Outsourcing Report template

Authors

Jolyon Miles-Wilson

Celestin Okoroji

Published

May 17, 2024

NOTE: In plots, where there is “n=”, this figure refers to the total number of respondents in the row/column. This presentation is somewhat misleading and will be changed in future iterations.

Chapter 1

Code
library(haven)
library(poLCA)
library(dplyr)
library(ggplot2)
library(tidyr)
library(skimr)
library(kableExtra)
library(MASS)
library(wesanderson)
library(ggrepel)
library(here)
library(emmeans)
library(Hmisc)
library(sjstats)
library(readr)
Code
rm(list = ls())
colours <- wes_palette("GrandBudapest2",4,"discrete")
better_colours <- c('#8dd3c7','#bebada','#fb8072','#80b1d3','#fdb462')
many_colours <- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928','#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f')
Code
#data <- haven::read_sav("../Data/2024-04-25 - Cleaned_Data.sav")
data <- readRDS("../Data/2024-05-13 - Cleaned_Data.rds") 
Code
url <- "https://www.ons.gov.uk/file?uri=/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/datasets/nationallabourmarketsummarybyregions01/current/lmregsummaryfebruary2024.xls"

filename <- basename(url)
filepath <- here("./data/", filename)
  
if(!file.exists(filepath)){
  cat("Downloading data\n")
  download.file(url, destfile = filepath, mode = "wb")
} else{
  cat("Data already in directory. Loading it.\n")
}
Data already in directory. Loading it.
Code
employed <- readxl::read_xls(filepath, sheet = "S01.1", range = "F13:F24", col_names = F)
area_names <- readxl::read_xls(filepath, sheet = "S01.1", range = "B13:B24",col_names = F)

rgn_empl_denoms <- data.frame(area_names, employed) %>%
  mutate(across(where(is.numeric), ~.*1000)) # *1000 to get real number

colnames(rgn_empl_denoms) <- c("Region","Employed")

rgn_empl_denoms <- rgn_empl_denoms %>%
  mutate(
    Weight = Employed/sum(Employed)
  )

Using our working definition, how many of us could be described as outsourced?

Code
total_outsourced <- data %>%
  group_by(outsourcing_status) %>%
  summarise(
    Sum = sum(NatRepemployees)
  ) %>%
  mutate(
    Proportion = Sum / sum(Sum),
    Percentage = 100 * Proportion
  )

readr::write_csv(total_outsourced, file="../outputs/data/total_outsourced.csv")

# Create function to find nearest denominator to express as a fraction.
f <- function(x) ifelse(abs(1/floor(1/x) - x) < abs(1/ceiling(1/x) - x),floor(1/x),ceiling(1/x))

According to our definition, 1 in 6 UK workers are outsourced.

Based on this definition, we’ve found that just under 17% of UK workers are ‘outsourced’1. Who makes up this group of 17% of UK workers?

Code
total_outsourced <- data %>%
  group_by(outsourcing_group) %>%
  summarise(
    Sum = sum(NatRepemployees)
  ) %>%
  mutate(
    Proportion = Sum / sum(Sum),
    Percentage = 100 * Proportion
  )

readr::write_csv(total_outsourced, file="../outputs/data/total_outsourced.csv")

In terms of the the different possible types of outsourced groups, the numbers are as follows:

  1. Definitely outsrouced: 11%
  2. Likely agency: 3%
  3. High indicators: 3%

!Placeholder for Venn diagram

Chapter 2: Who are the UK’s outsourced workers?

Top-line findings

Who are the UK’s outsourced workers?

Pay2

Code
# filter to just cases where income is abovve the fifth percentile and lower than the 95th? I.e., drop the top and bottom 5%.
income_statistics <- data %>%
  filter(income_drop_all == 0 & !is.na(income_annual_all)) %>%
  group_by(outsourcing_status) %>%
  summarise(
    mean = weighted.mean(income_annual_all, w = NatRepemployees, na.rm = T),
    median = wtd.quantile(income_annual_all, w = NatRepemployees, probs = c(.5), na.rm = T),
    min = wtd.quantile(income_annual_all, w = NatRepemployees, probs = c(0), na.rm = T),
    max = wtd.quantile(income_annual_all, w = NatRepemployees, probs = c(1), na.rm = T),
    stdev = sqrt(wtd.var(income_annual_all, w = NatRepemployees, na.rm = T))
  )

knitr::kable(income_statistics, 
             digits = 2, 
             col.names = c("Outsourcing group",
                           "Mean",
                           "Median",
                           "Min",
                           "Max",
                           "Standard dev.")) %>%
  kable_styling(full_width = F)
Outsourcing group Mean Median Min Max Standard dev.
Not outsourced 27174.96 25200.5 2000 68000 13400.97
Outsourced 25104.04 24000.0 2400 68000 13090.35
Code
readr::write_csv(income_statistics, file="../outputs/data/income_stats.csv")
Code
# plot the distribution of income for the two groups
data %>%
  filter(income_drop_all == 0 & !is.na(income_annual_all)) %>%
  ggplot(., aes(outsourcing_status, income_annual_all)) + 
  geom_violin() +
  geom_boxplot(width = 0.3) +
  geom_text(inherit.aes=F, data=income_statistics, aes(outsourcing_status, y = 6e+04), label=paste0("Mean = ", round(income_statistics$mean,0),"\n", "Median = ", income_statistics$median), nudge_x = 0.1, hjust=0) +
  coord_cartesian(xlim=c(1,2.5)) +
  theme_minimal() +
  xlab("Outsourcing status") + ylab("Annual income") +
  coord_cartesian(ylim = c(plyr::round_any(min(income_statistics$min), 5000, f = floor),plyr::round_any(max(income_statistics$max),5000, f = ceiling))) +
  scale_y_continuous(breaks = seq(plyr::round_any(min(income_statistics$min), 5000, f = ceiling), plyr::round_any(max(income_statistics$max),5000, f = ceiling), 10000))

The distribution for the different outsourcing groups is shown below. It indicates that income is particularly low for the ‘outsourced’ and ‘likely agency’ workers, whilst average income for the ‘high indicators’ workers is notably higher. This means that, were we not to consider the high indicators group, the difference in income between outsrouced and non-outsourced workers would be larger.

Code
income_statistics <- data %>%
  filter(income_drop_all == 0 & !is.na(income_annual_all)) %>%
  group_by(outsourcing_group) %>%
  summarise(
    n = n(),
    mean = weighted.mean(income_annual_all, w = NatRepemployees, na.rm = T),
    median = wtd.quantile(income_annual_all, w = NatRepemployees, probs = c(.5), na.rm = T),
    min = wtd.quantile(income_annual_all, w = NatRepemployees, probs = c(0), na.rm = T),
    max = wtd.quantile(income_annual_all, w = NatRepemployees, probs = c(1), na.rm = T),
    stdev = sqrt(wtd.var(income_annual_all, w = NatRepemployees, na.rm = T))
  )

data %>%
  filter(income_drop_all == 0 & !is.na(income_annual_all)) %>%
  ggplot(., aes(outsourcing_group, income_annual_all)) + 
  geom_violin() +
  geom_boxplot(width = 0.3) +
  geom_text(inherit.aes=F, data=income_statistics, aes(outsourcing_group, y = 6e+04), label=paste0("Mean = ", round(income_statistics$mean,0),"\n", "Median = ", round(income_statistics$median,0),"\n N = ", income_statistics$n), nudge_x = 0.1, hjust=0) +
  coord_cartesian(xlim=c(1,2.5)) +
  theme_minimal() +
  xlab("Outsourcing group") + ylab("Annual income") +
  coord_cartesian(ylim = c(plyr::round_any(min(income_statistics$min), 5000, f = floor),plyr::round_any(max(income_statistics$max),5000, f = ceiling))) +
  scale_y_continuous(breaks = seq(plyr::round_any(min(income_statistics$min), 5000, f = ceiling), plyr::round_any(max(income_statistics$max),5000, f = ceiling), 10000))

Code
temp_data <- data %>%
  filter(income_drop == 0 & !is.na(income_annual)) 

# ttest <- t.test(temp_data[which(temp_data$outsourcing_status=="Outsourced"),"income_annual"], 
#        temp_data[which(temp_data$outsourcing_status=="Not outsourced"),"income_annual"]
# )

ttest <-  sjstats::weighted_ttest(income_annual ~ outsourcing_status + NatRepemployees, temp_data)
#cohens_d(ttest)

Although the average pay between non-outsourced and outsourced workers looks similar, a t-test finds that there is a marginally significant difference; outsourced workers are on average paid less than non-outsourced workers (t(1511.07) = 3.97, p = 0).

Below, we run a linear regression testing whether the relationship between outsourcing status and annual income is influenced by income group (not low vs low), controlling for age, gender, ethnicity, and region. We do indeed find a significant interaction effect. The figure below plots this.

Code
test <- lm(income_annual ~ Age + Gender + Ethnicity + Region + outsourcing_status*income_group, data, weights = NatRepemployees)
# summary(test)

emmeans(test, specs = "outsourcing_status", by = "income_group")
income_group = Not low:
 outsourcing_status emmean    SE   df lower.CL upper.CL
 Not outsourced      62912 13148 7360    37139    88686
 Outsourced          90189 13564 7360    63598   116779

income_group = Low:
 outsourcing_status emmean    SE   df lower.CL upper.CL
 Not outsourced      23036 13200 7360    -2840    48912
 Outsourced          21766 13846 7360    -5375    48908

Results are averaged over the levels of: Gender, Region 
Confidence level used: 0.95 
Code
sjPlot::plot_model(test, type = "int")

The results here indicate that among workers that are not paid below our low pay threshold, an outsourced worker can typically be expected to earn considerably more than a non-outsourced worker, whereas among workers that are paid below our low pay threshold, an outsourced worker can typically be expected to be paid the same as a non-outsourced worker (maybe slightly less). Of note is that for both pay groups, the variance in pay is higher for the outsourced groups.

Major group code

Comparison of Majorgroupcode indicates that a higher proportion of outsourced people work in Elementary Occupations, compared to non-outsourced people. A lower proportion of outsourced people work in administrative and secretarial occupations, associate professional occupations and professional occupations.

Code
mgc_summary <- data %>%
  group_by(outsourcing_status,Majorgroupcode) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  mutate(
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum)
  )

readr::write_csv(mgc_summary, "../outputs/data/majorgroupcode_summary.csv")
Code
mgc_summary %>%
  ggplot(aes(outsourcing_status, perc, fill = as.factor(Majorgroupcode))) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values=many_colours)

Code
mgc_key <- data.frame("number" = seq(1,11,1),
                            "Major group code" = c( levels(haven::as_factor(mgc_summary$Majorgroupcode)),NA))

mgc_key %>%
  kable() %>%
  kable_styling(full_width = F)
number Major.group.code
1 ADMINISTRATIVE AND SECRETARIAL OCCUPATIONS
2 ASSOCIATE PROFESSIONAL OCCUPATIONS
3 CARING, LEISURE AND OTHER SERVICE OCCUPATIONS
4 ELEMENTARY OCCUPATIONS
5 MANAGERS, DIRECTORS AND SENIOR OFFICIALS
6 NA
7 PROCESS, PLANT AND MACHINE OPERATIVES
8 PROFESSIONAL OCCUPATIONS
9 SALES AND CUSTOMER SERVICE OCCUPATIONS
10 SKILLED TRADES OCCUPATIONS
11 NA

The table below shows the percentage of outsourced and non-outsourced workers in each majorgroupcode, as well as the difference between them (positive numbers in the difference column indicate occupations that are more common for outsourced work, negative numbers indicate occupations that are less common for outsourced work).3

Code
mgc_summary %>%
  select(outsourcing_status, Majorgroupcode, perc) %>%
  pivot_wider(names_from = outsourcing_status, values_from = perc) %>%
  mutate(
    Majorgroupcode = haven::as_factor(Majorgroupcode),
    Difference = `Outsourced` - `Not outsourced`
    ) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F)
Majorgroupcode Not outsourced Outsourced Difference
ADMINISTRATIVE AND SECRETARIAL OCCUPATIONS 14.09 10.23 -3.85
ASSOCIATE PROFESSIONAL OCCUPATIONS 13.23 9.35 -3.88
CARING, LEISURE AND OTHER SERVICE OCCUPATIONS 8.85 10.00 1.14
ELEMENTARY OCCUPATIONS 8.84 17.12 8.28
MANAGERS, DIRECTORS AND SENIOR OFFICIALS 11.07 12.23 1.16
NA 0.41 0.29 -0.12
PROCESS, PLANT AND MACHINE OPERATIVES 6.19 6.99 0.80
PROFESSIONAL OCCUPATIONS 19.92 17.57 -2.35
SALES AND CUSTOMER SERVICE OCCUPATIONS 12.06 10.97 -1.10
SKILLED TRADES OCCUPATIONS 5.32 5.24 -0.07
NA 0.01 NA NA
Code
mgc_summary_2 <- data %>%
  group_by(Majorgroupcode,outsourcing_status) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  mutate(
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum)
  ) %>% 
  drop_na()

mgc_summary_2_wider <- mgc_summary_2 %>%
  select(outsourcing_status, Majorgroupcode, perc) %>%
  pivot_wider(names_from = outsourcing_status, values_from = perc) %>%
  mutate(
    Majorgroupcode = haven::as_factor(Majorgroupcode),
    #Difference = `Outsourced` - `Not outsourced`,
    Ratio_10 = 10 * (`Outsourced` / `Not outsourced`) # indicates how many outsourced workers there are for every 10 non outsourced
    )

mgc_summary_2_wider %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F)
Majorgroupcode Not outsourced Outsourced Ratio_10
ADMINISTRATIVE AND SECRETARIAL OCCUPATIONS 87.19 12.81 1.47
ASSOCIATE PROFESSIONAL OCCUPATIONS 87.49 12.51 1.43
CARING, LEISURE AND OTHER SERVICE OCCUPATIONS 81.41 18.59 2.28
ELEMENTARY OCCUPATIONS 71.86 28.14 3.92
MANAGERS, DIRECTORS AND SENIOR OFFICIALS 81.73 18.27 2.24
NA 87.59 12.41 1.42
PROCESS, PLANT AND MACHINE OPERATIVES 81.41 18.59 2.28
PROFESSIONAL OCCUPATIONS 84.86 15.14 1.78
SALES AND CUSTOMER SERVICE OCCUPATIONS 84.47 15.53 1.84
SKILLED TRADES OCCUPATIONS 83.37 16.63 1.99
Code
readr::write_csv(mgc_summary_2, "../outputs/data/majorgroupcode_summary_2_long.csv")

readr::write_csv(mgc_summary_2_wider, "../outputs/data/majorgroupcode_summary_2_wide.csv")

The plot below summarises the average pay (x-axis) in each occupation (y-axis) for outsourced and non-outsourced workers (dot colour), as well as the size of the respective workforce (size of dots). Here the size of the dot represents the percentage of workers within the sector who are outsourced (blue) or not outsourced (purple).4

Code
mgc_summary_2 %>%
  mutate(
    Majorgroupcode = haven::as_factor(Majorgroupcode)
  ) %>%
  ggplot(., aes(Majorgroupcode, wtd_avg_income, size = perc, colour = outsourcing_status)) +
    geom_point(position = "dodge") + 
    coord_flip() +
  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank())+
  scale_y_continuous(breaks=seq(0,max(mgc_summary$wtd_avg_income), 25000)) +
  scale_colour_manual(values=colours) +
  guides(size=FALSE) # remove size legend as gauging size is difficult

It shows, as might be expected, the size of the outsourced workforce for each sector is smaller than the non-outsourced workforce, but the ratio is not the same for all sectors. The sector with the largest non-outsourced:outsourced ratio is Elementary occupations; for every 10 non-outsourced workers, there are 4 outsourced workers. This is followed by caring, leisure, and other service occupations, and process, plant and machine operatives, both of which employ 2 outsourced workers for every 10 non-outsourced workers.

Notably, in elementary occupations and sales and customer service occupations, outsourced workers are on average paid more than non-outsourced workers. In contrast, workers in process, plant and machine operations are paid less if they are outsourced than if they are not outsourced.

Unit occupations

A deep dive into elementary occupations, process occupations, and caring occupations reveals that there are differences between occupations in the size of the outsourced workforce and pay.

Elementary occupations5
Code
elem_summary <- data %>%
  filter(Majorgroupcode %in% c(4)) %>%
  group_by(UnitOccupation,outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  mutate(
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum)
  ) %>% 
  drop_na()

elem_summary %>%
  mutate(
    UnitOccupation = haven::as_factor(UnitOccupation)
  ) %>%
  ggplot(., aes(wtd_avg_income,UnitOccupation, size = perc, colour = outsourcing_status)) +
    geom_point() + 
    geom_text_repel(inherit.aes = F, aes(wtd_avg_income, UnitOccupation, colour = outsourcing_status, label=paste0("n=",n))) +
  #coord_flip() +

  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  # scale_x_continuous(breaks=scales::pretty(10))
  scale_colour_manual(values=colours) +

    guides(size=FALSE) 

Code
readr::write_csv(elem_summary, "../outputs/data/elementary_occs_summary.csv")
Process occupations6
Code
proc_summary <- data %>%
  filter(Majorgroupcode %in% c(7)) %>%
  group_by(UnitOccupation,outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  mutate(
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum)
  ) %>% 
  drop_na()

proc_summary %>%
  mutate(
    UnitOccupation = haven::as_factor(UnitOccupation)
  ) %>%
  ggplot(., aes(wtd_avg_income,UnitOccupation, size = perc, colour = outsourcing_status)) +
    geom_point() + 
    geom_text_repel(inherit.aes = F, aes(wtd_avg_income, UnitOccupation, colour = outsourcing_status, label=paste0("n=",n))) +
  #coord_flip() +

  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  # scale_x_continuous(breaks=scales::pretty(10))
  scale_colour_manual(values=colours) +

    guides(size=FALSE) 

Code
readr::write_csv(proc_summary, "../outputs/data/process_occs_summary.csv")
Caring occupations7
Code
caring_summary <- data %>%
  filter(Majorgroupcode %in% c(3)) %>%
  group_by(UnitOccupation,outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  mutate(
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum)
  ) %>% 
  drop_na()

caring_summary %>%
  mutate(
    UnitOccupation = haven::as_factor(UnitOccupation)
  ) %>%
  ggplot(., aes(wtd_avg_income,UnitOccupation, size = perc, colour = outsourcing_status)) +
    geom_point() + 
    geom_text_repel(inherit.aes = F, aes(wtd_avg_income, UnitOccupation, colour = outsourcing_status, label=paste0("n=",n))) +
  #coord_flip() +

  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  # scale_x_continuous(breaks=scales::pretty(10))
  scale_colour_manual(values=colours) +

    guides(size=FALSE) 

Code
readr::write_csv(caring_summary, "../outputs/data/caring_occs_summary.csv")

Sector

Sector within outsourcing status

This framing shows how outsourced and non-outsourced workers are distributed across sectors.

Code
sector_summary <- data %>%
  group_by(outsourcing_status, SectorName, SectorName_labelled) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(outsourcing_status) %>%
  mutate(
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    SectorName_labelled = case_when(SectorName_labelled == "NA" ~ NA,
                                    TRUE ~ SectorName_labelled),
    SectorName_short = SectorName_labelled
  ) %>%
  # make the sector names more readable
  separate_wider_delim(SectorName_short, names = c("SectorName_short", "SectorName_short_detail"), delim=";",
                       too_few = "align_start") %>%
  mutate(
    SectorName_short = factor(stringr::str_to_sentence(SectorName_short)),
    SectorName_short_detail = factor(stringr::str_to_sentence(SectorName_short_detail)),
  )



readr::write_csv(sector_summary, "../outputs/data/sector_summary.csv")

The plot below shows the sector breakdown by outsourcing status. I.e. this is how outsourced and not outsourced workers are distributed across sectors.8

Note

With this framing we could say things like “as an outsourced worker, you are x times more likely to work in than a non-outsourced worker”

Code
plot_data <- sector_summary %>%
  drop_na(SectorName_labelled) %>%
  droplevels()

plot_data %>%
  ggplot(aes(outsourcing_status, perc, fill = as.factor(SectorName))) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values=many_colours)

Code
sector_key <- data.frame("number" = seq(1,length(unique(plot_data$SectorName)),1),
                            "Sector" = levels(plot_data$SectorName_labelled))

sector_key %>%
  kable() %>%
  kable_styling(full_width = F)
number Sector
1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES
2 ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS AND BODIES
3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US
4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES
5 AGRICULTURE, FORESTRY AND FISHING
6 ARTS, ENTERTAINMENT AND RECREATION
7 CONSTRUCTION
8 EDUCATION
9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY
10 FINANCIAL AND INSURANCE ACTIVITIES
11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES
12 INFORMATION AND COMMUNICATION
13 MANUFACTURING
14 MINING AND QUARRYING
15 Not found
16 OTHER SERVICE ACTIVITIES
17 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES
18 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY
19 REAL ESTATE ACTIVITIES
20 TRANSPORTATION AND STORAGE
21 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES
22 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES

The table below shows what percentage of outsourced and non-outsourced workers work in each sector, as well as the difference between them (positive numbers in the difference column indicate sectors that are more common for outsourced work, negative numbers indicate sectors that are less common for outsourced work).9

It indicates that sectors that are less common for outsourced workers compared to not outsourced are:

  • PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY
  • EDUCATION

And sectors that are more common for outsourced workers compared to not outsourced are:

  • ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES

For these sectors that differ most in the concentration of the outsourced workforce, there is a pattern (if three data points can be called that) whereby in sectors with a relatively higher concentration of outsourced workers, outsourced workers are paid less, and in sectors with a relatively lower concentration of outsourced workers, outsourced workers are paid more. This is tenuous, but it is an example of the heterogeneity in income between sectors, and should be explored further.

Code
sector_summary_2 <- plot_data %>%
  select(outsourcing_status, SectorName, SectorName_short, perc, wtd_avg_income) %>%
  pivot_wider(names_from = outsourcing_status, values_from = c(perc, wtd_avg_income)) %>%
  mutate(
    perc_difference = `perc_Outsourced` - `perc_Not outsourced`,
    income_difference = `wtd_avg_income_Outsourced` - `wtd_avg_income_Not outsourced`
    ) %>%
  relocate(
    perc_difference, .after = perc_Outsourced
  )

sector_summary_2 %>%
  arrange(desc(abs(perc_difference))) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F)
SectorName SectorName_short perc_Not outsourced perc_Outsourced perc_difference wtd_avg_income_Not outsourced wtd_avg_income_Outsourced income_difference
19 Public administration and defence 7.98 4.41 -3.57 44269.46 80688.11 36418.64
4 Administrative and support service activities 2.76 6.30 3.54 34008.23 25773.63 -8234.59
8 Education 11.00 8.17 -2.83 42664.00 66203.95 23539.95
13 Manufacturing 8.39 7.28 -1.10 47496.16 61992.04 14495.88
11 Human health and social work activities 16.74 15.81 -0.93 34943.28 37315.45 2372.18
21 Transportation and storage 4.86 5.79 0.93 50974.51 40399.44 -10575.07
7 Construction 3.22 4.13 0.91 57729.30 79257.70 21528.41
17 Other service activities 2.77 3.66 0.89 39021.24 49283.90 10262.65
22 Water supply 0.76 1.62 0.85 46514.17 50316.51 3802.34
1 Accommodation and food service activities 5.89 6.71 0.83 23814.49 46561.01 22746.52
12 Information and communication 4.58 5.32 0.74 63987.91 97333.04 33345.13
18 Professional, scientific and technical activities 3.92 4.66 0.74 53888.29 69683.01 15794.72
3 Activities of households as employers 0.31 0.86 0.54 57350.92 21162.34 -36188.59
23 Wholesale and retail trade 15.43 14.93 -0.49 36426.50 59181.02 22754.52
16 Not found 0.30 0.68 0.38 30372.90 30421.16 48.26
20 Real estate activities 1.25 0.97 -0.29 39630.78 27557.00 -12073.78
6 Arts, entertainment and recreation 1.81 1.61 -0.20 26930.44 21607.35 -5323.09
5 Agriculture, forestry and fishing 0.42 0.22 -0.20 36423.88 25767.70 -10656.18
9 Electricity, gas, steam and air conditioning supply 1.21 1.32 0.10 84854.37 73130.20 -11724.17
10 Financial and insurance activities 4.91 4.81 -0.10 62097.41 143214.64 81117.23
2 Activities of extraterritorial organisations and bodies 0.04 NA NA 32367.32 NA NA
14 Mining and quarrying 0.10 NA NA 54859.57 NA NA
Code
readr::write_csv(sector_summary_2, file="../outputs/data/sector_summary_2.csv")

The plot below plots the percentage difference in the concentration of outsourced vs non-outsourced workers (i.e. the difference between what proportion of workers of each type are in each sector) against the income difference for that sector (i.e., the difference in the average income between groups). Note that a statistical test of this relationship shows it is non-significant. This plot therefore only serves as an illustration of where workers are situated in terms of sector and pay. A key takeaway here is that there is considerable variation in the difference in pay between outsourced and non-outsourced workers. There also appears to be a central area where the concentration of outsourced vs non-outsourced workers is quite similar, but the pay for outsourced workers is lower. This might indicate sectors where employment of outsourced workers is as common as employment of non-outsourced workers, but where outsourced workers are paid less than non-outsourced workers. These sectors are:

  • 5: AGRICULTURE, FORESTRY AND FISHING
  • 6: ARTS, ENTERTAINMENT AND RECREATION
  • 9: ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY
  • 20: REAL ESTATE ACTIVITIES
Code
annotations <- data.frame(
        xpos = c(-Inf,-Inf,Inf,Inf), # this sets to corners
        ypos =  c(-Inf, Inf,-Inf,Inf), # this sets to corners
        annotateText = c("Outsourced concentration smaller, pay lower","Outsourced concentration smaller, pay higher",
                        "Outsourced concentration larger, pay lower","Outsourced concentration larger, pay higher"),
        hjustvar = c(0,0,1,1), # higher values = right, lower values = left 
        vjustvar = c(-0.5,1,-0.5,1) # higher values = up, lower values = down
        )

sector_summary_2 %>%
  ggplot(aes(perc_difference, income_difference)) +
  geom_point() +
  geom_smooth(method="lm", colour=many_colours[2], se=FALSE) +
  theme_minimal() +
  ylab("Income difference in £ (out. - non-out.)") + xlab("Workforce proportion difference (out. - non-out)") +
  geom_text_repel(aes(label=as.character(SectorName))) +
  geom_text(inherit.aes = F, data = annotations, aes(xpos, ypos, label=annotateText, hjust=hjustvar, vjust=vjustvar)) +
  geom_hline(yintercept = 0, colour="red",linetype="dashed")

Code
test <- lm(income_difference ~ perc_difference, sector_summary_2)
summary(test)

Call:
lm(formula = income_difference ~ perc_difference, data = sector_summary_2)

Residuals:
   Min     1Q Median     3Q    Max 
-43309 -15858   -750   9840  70740 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)  
(Intercept)         9860       5442   1.812   0.0867 .
perc_difference    -5043       3792  -1.330   0.2001  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 24330 on 18 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.08948,   Adjusted R-squared:  0.0389 
F-statistic: 1.769 on 1 and 18 DF,  p-value: 0.2001

Outsourcing status within sector10

This framing shows how sectors are composed, i.e., what proportion of workers in each sector are outsourced vs non-outsourced.

Code
sector_summary_3 <- data %>%
  group_by(SectorName, SectorName_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(SectorName) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    SectorName_labelled = case_when(SectorName_labelled == "NA" ~ NA,
                                    TRUE ~ SectorName_labelled),
    SectorName_short = SectorName_labelled
  ) %>%
  # make the sector names more readable
  separate_wider_delim(SectorName_short, names = c("SectorName_short", "SectorName_short_detail"), delim=";",
                       too_few = "align_start") %>%
  mutate(
    SectorName_short = factor(stringr::str_to_sentence(SectorName_short)),
    SectorName_short_detail = factor(stringr::str_to_sentence(SectorName_short_detail)),
  )

write_csv(sector_summary_3, file="../outputs/data/sector_summary_3.csv")

The plot below shows the proportion of outsourced and not outsourced workers within each sector. I.e. this is showing what sectors have higher and lower proportions of outsourced workers.

Note

With this framing we could say things like “sector a is x times more likely to employ outsourced workers than sector b”

Code
plot_data <- sector_summary_3 %>%
  drop_na(SectorName_short) %>%
  droplevels()

# annotation_df <- plot_data %>%
#   select(SectorName_short, outsourcing_status, perc, n
# mutate(
  
annotation_df <- plot_data %>%
  filter(outsourcing_status == "Not outsourced") %>%
  select(SectorName_short, N) %>%
  mutate(
    ypos = 80
  )


ggplot(plot_data,aes(SectorName_short, perc, fill = outsourcing_status)) +
  geom_col() +
  geom_text(inherit.aes=F,data=annotation_df, aes(x=SectorName_short, y=ypos, label = paste0("N = ", N)), hjust=1, nudge_y = 15) +
  coord_flip() +
  scale_fill_manual(values=many_colours) +
  scale_y_continuous(breaks=seq(0,100,10))

Code
# sector_key <- data.frame("number" = seq(1,length(unique(plot_data$SectorName_labelled)),1),
#                           "Sector" = levels(plot_data$SectorName_labelled))
# 
# sector_key %>%
#   kable() %>%
#   kable_styling(full_width = F)

The table below shows the percentage of outsourced workers in each Sector, ordered descending by percentage. It shows that the top three Sectors with the highest proportion of outsourced workers are:

  • ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US (note that N = 31)
  • ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES
  • WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES

Note that for an undefined sector (‘Not found’) contained one of the largest proportions of outsourced workers (31% of workers in the ‘Not found’ category were outsourced).

Code
plot_data %>%
  filter(outsourcing_status == "Outsourced") %>%
  arrange(desc(perc)) %>%
  select(SectorName_labelled, perc) %>%
  kable() %>%
  kable_styling()
SectorName SectorName_labelled perc
3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US 35.652378
4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES 31.570055
16 Not found 31.317619
22 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES 30.008923
17 OTHER SERVICE ACTIVITIES 21.102417
7 CONSTRUCTION 20.589291
21 TRANSPORTATION AND STORAGE 19.415919
18 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES 19.369714
12 INFORMATION AND COMMUNICATION 19.034279
1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES 18.738635
9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY 18.008232
10 FINANCIAL AND INSURANCE ACTIVITIES 16.529705
23 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES 16.373830
11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 16.037739
6 ARTS, ENTERTAINMENT AND RECREATION 15.255060
13 MANUFACTURING 14.939669
20 REAL ESTATE ACTIVITIES 13.504099
8 EDUCATION 13.065534
19 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY 10.051123
5 AGRICULTURE, FORESTRY AND FISHING 9.709408

Exploring this workforce makeup in the context of income shows that there are some sectors where outsourced workers are paid more and some where they are paid less than non-outsourced workers. The plot below visualises this.

Sectors where outsourced workers are paid less:

  • ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US (note that N = 31)
  • ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES (note that N = 32)
  • AGRICULTURE, FORESTRY AND FISHING
  • ARTS, ENTERTAINMENT AND RECREATION
  • ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY
  • PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY
  • REAL ESTATE ACTIVITIES
  • WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES

Sectors where outsourced workers are paid more:

  • ACCOMMODATION AND FOOD SERVICE ACTIVITIES
  • CONSTRUCTION
  • EDUCATION
  • INFORMATION AND COMMUNICATION
  • MANUFACTURING
  • Not found
  • OTHER SERVICE ACTIVITIES
  • PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES
  • REAL ESTATE ACTIVITIES

Note that in 2 or 3 of the Sectors where outsourced workers are paid less are low-paying Sectors. (this needs to be double-checked)

Code
annotation_df <- sector_summary_3 %>%
  filter(outsourcing_status == "Not outsourced") %>%
  select(SectorName_short, N) %>%
  group_by(SectorName_short) %>%
  summarise(
    N = sum(N)
  ) %>%
    mutate(
    ypos = 110000
  ) 

sector_summary_3 %>%
  # mutate(
  #   SectorName = as.factor(SectorName)
  # ) %>%
  ggplot(., aes(wtd_avg_income,SectorName_short, size = perc, colour = outsourcing_status)) +
    geom_point(position = "dodge") + 
  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank())+
      #coord_flip() +
  scale_x_continuous(breaks=seq(0,max(sector_summary$wtd_avg_income), 25000)) +
  scale_colour_manual(values=colours) +
  geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=SectorName_short, label = paste0("N = ", N)), hjust=1, nudge_x=20000) +
  guides(size=FALSE) # remove size legend as gauging size is difficult

Apply the paysplit11
Code
sector_summary_paysplit <- data %>%
  group_by(SectorName, SectorName_labelled, income_group, outsourcing_status) %>%
  drop_na(income_group) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(SectorName, income_group) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    SectorName_labelled = case_when(SectorName_labelled == "NA" ~ NA,
                                    TRUE ~ SectorName_labelled),
    SectorName_short = SectorName_labelled
  ) %>%
  # make the sector names more readable
  separate_wider_delim(SectorName_short, names = c("SectorName_short", "SectorName_short_detail"), delim=";",
                       too_few = "align_start") %>%
  mutate(
    SectorName_short = factor(stringr::str_to_sentence(SectorName_short)),
    SectorName_short_detail = factor(stringr::str_to_sentence(SectorName_short_detail)),
  )

write_csv(sector_summary_paysplit, file="../outputs/sector_summary_paysplit.csv")
Code
for(group in unique(sector_summary_paysplit$income_group)){

  plot_data <- sector_summary_paysplit %>%
    filter(income_group==group) %>%
    drop_na(SectorName_short) %>%
    droplevels()
  
  # set ypos for labels
  max_value <- max(plot_data$perc)
  ypos <- 0.8 * max_value
  
  annotation_df <- plot_data %>%
    filter(outsourcing_status == "Not outsourced") %>%
    select(SectorName_short, N) %>%
    group_by(SectorName_short) %>%
    mutate(
      ypos = ypos
    )
  
  plot <- ggplot(plot_data, aes(SectorName_short, perc, fill = outsourcing_status)) +
    geom_col() +
    geom_text(inherit.aes=F,data=annotation_df, aes(x=SectorName_short, y=ypos, label = paste0("N = ", N)), hjust=1, nudge_y = 15) +
    coord_flip() +
    scale_fill_manual(values=many_colours) +
    scale_y_continuous(breaks=seq(0,100,10)) +
    ggtitle(paste0(group, " income"))
  
  print(plot)
  
}

The percentages below show be read as e.g. 20% of low paid workers in accommodation and food services are outsourced, compared to 16% of not low paid workers in accommodation and food services.

Code
plot_data <- sector_summary_paysplit %>%
  filter(outsourcing_status=="Outsourced")

max_value <- max(plot_data$perc)
ypos <- 0.8 * max_value

annotation_df <- plot_data %>%
  filter(outsourcing_status == "Outsourced") %>%
  select(SectorName_short, income_group, N) %>%
  group_by(SectorName_short) %>%
  mutate(
    
    ypos = ypos
  )

ggplot(plot_data, aes(SectorName_short, perc, fill = income_group)) +
    geom_col(position = position_dodge2()) +
    # geom_text(inherit.aes=F,data=annotation_df, aes(x=SectorName_short, y=ypos, label = paste0("N = ", N)), position = position_dodge(width=2)) +
    coord_flip() +
    scale_fill_manual(values=many_colours) +
    scale_y_continuous(breaks=seq(0,50,10)) +
    ggtitle("Percentage of outsourced workers within\neach sector by income group")

Code
for(group in unique(sector_summary_paysplit$income_group)){
  # set ypos for labels
  max_income <- max(sector_summary_paysplit$wtd_avg_income[which(sector_summary_paysplit$income_group==group)])
  ypos <- 0.8 * max_income
  
  annotation_df <- sector_summary_paysplit %>%
    filter(income_group==group) %>%
    filter(outsourcing_status == "Not outsourced") %>%
    select(SectorName_short, N) %>%
    group_by(SectorName_short) %>%
    summarise(
      N = sum(N)
    ) %>%
    mutate(
      ypos = ypos
    )
  
  plot <- sector_summary_paysplit %>%
    filter(income_group == group) %>%
    ggplot(., aes(wtd_avg_income,SectorName_short, size = perc, colour = outsourcing_status)) +
      geom_point(position = "dodge") + 
    theme_minimal() +
    theme(legend.position = "bottom",
          legend.title = element_blank())+
        #coord_flip() +
    scale_x_continuous(breaks=seq(0,max_income, plyr::round_any(max_income/5, 100))) +
    scale_colour_manual(values=colours) +
    geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=SectorName_short, label = paste0("N = ", N)), hjust=1, nudge_x=20000) +
    guides(size=FALSE) + # remove size legend as gauging size is difficult
    ggtitle(paste0(group, " income"))
  
  print(plot)
}

Sector repeat: Outsourcing group

Code
sector_summary <- data %>%
  group_by(outsourcing_group, SectorName, SectorName_labelled) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_annual, na.rm=T),
    wtd_avg_income = weighted.mean(income_annual, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(outsourcing_group) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    SectorName_labelled = case_when(SectorName_labelled == "NA" ~ NA,
                                    TRUE ~ SectorName_labelled),
    SectorName_short = SectorName_labelled
  ) %>%
  # make the sector names more readable
  separate_wider_delim(SectorName_short, names = c("SectorName_short", "SectorName_short_detail"), delim=";",
                       too_few = "align_start") %>%
  mutate(
    SectorName_short = factor(stringr::str_to_sentence(SectorName_short)),
    SectorName_short_detail = factor(stringr::str_to_sentence(SectorName_short_detail)),
  )

The plot below shows the distribution of sectors within each outsourcing group. The standout differences are:

  • Greater proprotion of HUMAN HEALTH AND SOCIAL WORK ACTIVITIES in the ‘likely agency’ category, compared to other groups
  • Greater proprotion of CONSTRUCTION in the ‘likely agency’ category, compared to other groups
  • Greater proportion of ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES in the ‘outsourced’ category, compared to other groups
  • Greater proportion of WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES in the ‘high indicators’ category, compared to other groups

(Really, this plot is better for showing the makeup of each type of outsroucing group - comparisons aer better made comparing outsroucing group within sectors. Here is a better way of interpeting these plots):

For the high indicator group, the sector with the largest proprotion of workers was WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES, closely followed by HUMAN HEALTH AND SOCIAL WORK ACTIVITIES.

For the likely agency group, the sector with the largest proprotion of workers was HUMAN HEALTH AND SOCIAL WORK ACTIVITIES.

For the outsourced group, the sector with the largest proprotion of workers was WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES, closely followed by HUMAN HEALTH AND SOCIAL WORK ACTIVITIES.

Note that also for the not outsourced group, the sector with the largest proprotion of workers was HUMAN HEALTH AND SOCIAL WORK ACTIVITIES, closely followed by WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES.

[This might say something about the demographics of the people who were sampled in this survey]

Code
plot_data <- sector_summary %>%
  drop_na(SectorName_labelled) %>%
  droplevels()

plot_data %>%
  ggplot(aes(outsourcing_group, perc, fill = as.factor(SectorName))) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values=many_colours)

Code
sector_key <- data.frame("number" = seq(1,length(unique(plot_data$SectorName)),1),
                            "Sector" = unique(plot_data$SectorName_labelled))

sector_key %>%
  kable() %>%
  kable_styling(full_width = F)
number Sector
1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES
2 ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS AND BODIES
3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US
4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES
5 AGRICULTURE, FORESTRY AND FISHING
6 ARTS, ENTERTAINMENT AND RECREATION
7 CONSTRUCTION
8 EDUCATION
9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY
10 FINANCIAL AND INSURANCE ACTIVITIES
11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES
12 INFORMATION AND COMMUNICATION
13 MANUFACTURING
14 MINING AND QUARRYING
15 Not found
16 OTHER SERVICE ACTIVITIES
17 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES
18 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY
19 REAL ESTATE ACTIVITIES
20 TRANSPORTATION AND STORAGE
21 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES
22 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES
Code
plot_data %>%
  select(outsourcing_group, SectorName, SectorName_labelled, perc) %>%
  group_by(outsourcing_group) %>%
  arrange(desc(perc), .by_group=TRUE) %>%
  kable() %>%
  kable_styling(full_width = F)
outsourcing_group SectorName SectorName_labelled perc
Not outsourced 11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 16.7360613
Not outsourced 23 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES 15.4267755
Not outsourced 8 EDUCATION 10.9976142
Not outsourced 13 MANUFACTURING 8.3872365
Not outsourced 19 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY 7.9832203
Not outsourced 1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES 5.8872856
Not outsourced 10 FINANCIAL AND INSURANCE ACTIVITIES 4.9095444
Not outsourced 21 TRANSPORTATION AND STORAGE 4.8584753
Not outsourced 12 INFORMATION AND COMMUNICATION 4.5805150
Not outsourced 18 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES 3.9235934
Not outsourced 7 CONSTRUCTION 3.2214668
Not outsourced 17 OTHER SERVICE ACTIVITIES 2.7676269
Not outsourced 4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES 2.7603460
Not outsourced 6 ARTS, ENTERTAINMENT AND RECREATION 1.8053220
Not outsourced 20 REAL ESTATE ACTIVITIES 1.2521925
Not outsourced 9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY 1.2117371
Not outsourced 22 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES 0.7627515
Not outsourced 5 AGRICULTURE, FORESTRY AND FISHING 0.4225971
Not outsourced 3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US 0.3123761
Not outsourced 16 Not found 0.3030015
Not outsourced 14 MINING AND QUARRYING 0.1034233
Not outsourced 2 ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS AND BODIES 0.0431916
Outsourced 23 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES 14.8186202
Outsourced 11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 14.1182403
Outsourced 8 EDUCATION 8.6944943
Outsourced 13 MANUFACTURING 7.1682245
Outsourced 4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES 7.1400736
Outsourced 1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES 6.7289225
Outsourced 21 TRANSPORTATION AND STORAGE 5.5518952
Outsourced 12 INFORMATION AND COMMUNICATION 5.2270278
Outsourced 18 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES 5.1295921
Outsourced 10 FINANCIAL AND INSURANCE ACTIVITIES 4.8852436
Outsourced 19 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY 4.6701148
Outsourced 17 OTHER SERVICE ACTIVITIES 4.0418607
Outsourced 7 CONSTRUCTION 3.6893071
Outsourced 22 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES 1.7774160
Outsourced 6 ARTS, ENTERTAINMENT AND RECREATION 1.6604915
Outsourced 9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY 1.2565601
Outsourced 3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US 0.9852412
Outsourced 20 REAL ESTATE ACTIVITIES 0.8869119
Outsourced 16 Not found 0.6672183
Outsourced 5 AGRICULTURE, FORESTRY AND FISHING 0.2801044
Likely agency 11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 22.3782074
Likely agency 23 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES 11.7348709
Likely agency 1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES 9.5233357
Likely agency 13 MANUFACTURING 7.7512619
Likely agency 7 CONSTRUCTION 6.8292108
Likely agency 8 EDUCATION 5.9626362
Likely agency 21 TRANSPORTATION AND STORAGE 5.5687150
Likely agency 12 INFORMATION AND COMMUNICATION 5.1177601
Likely agency 4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES 5.0349824
Likely agency 19 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY 4.9276124
Likely agency 10 FINANCIAL AND INSURANCE ACTIVITIES 4.2492270
Likely agency 18 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES 2.5851476
Likely agency 17 OTHER SERVICE ACTIVITIES 1.8324543
Likely agency 6 ARTS, ENTERTAINMENT AND RECREATION 1.6318044
Likely agency 22 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES 1.2974489
Likely agency 9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY 1.1617726
Likely agency 3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US 0.9038069
Likely agency 16 Not found 0.7203230
Likely agency 20 REAL ESTATE ACTIVITIES 0.5694658
Likely agency 5 AGRICULTURE, FORESTRY AND FISHING 0.2199565
High indicators 23 WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES 18.4511626
High indicators 11 HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 16.5456358
High indicators 8 EDUCATION 8.1099485
High indicators 13 MANUFACTURING 7.3160094
High indicators 21 TRANSPORTATION AND STORAGE 6.9715309
High indicators 12 INFORMATION AND COMMUNICATION 5.9221553
High indicators 10 FINANCIAL AND INSURANCE ACTIVITIES 5.0133674
High indicators 18 PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES 4.6896460
High indicators 4 ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES 4.0053150
High indicators 1 ACCOMMODATION AND FOOD SERVICE ACTIVITIES 3.9750691
High indicators 17 OTHER SERVICE ACTIVITIES 3.8158394
High indicators 7 CONSTRUCTION 3.3883241
High indicators 19 PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY 2.8467744
High indicators 9 ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY 1.7075018
High indicators 20 REAL ESTATE ACTIVITIES 1.6732156
High indicators 6 ARTS, ENTERTAINMENT AND RECREATION 1.3609799
High indicators 22 WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES 1.2566393
High indicators 16 Not found 0.7135371
High indicators 3 ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US 0.2744929

13/05 MORE TO DO HERE FOR SECTOR AS ABOVE

Ethnicity

Code
ethnicity_statistics <- data %>%
  group_by(outsourcing_status, Ethnicity_labelled) %>%
  summarise(
    n = n(), # count cases
    Frequency = sum(NatRepemployees) # count weighted cases
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    Ethnicity_short = Ethnicity_labelled
  ) %>%
    separate_wider_delim(Ethnicity_short, 
                         names = c("Ethnicity_short", "Ethnicity detail"), 
                         delim = stringr::regex(" / |, "),  # use multiple delims
                         too_few = "align_start",
                         too_many = "merge")

readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_1.csv")
Code
ethnicities <- as.vector(unique(haven::as_factor(data$Ethnicity)))
non_white_ethnicities <- ethnicities[!(ethnicities %in% "English / Welsh / Scottish / Northern Irish / British")]


# Will throw NA warning. I think this OK but investigate how to avoid the problem
summary_table <- data %>%
  mutate(
    Ethnicity = haven::as_factor(Ethnicity)
  ) %>%
  mutate(
    Ethnicity = forcats::fct_collapse(as.character(Ethnicity),
                                      "White British" = c("English / Welsh / Scottish / Northern Irish / British"),
                                      "Non-White British" = non_white_ethnicities)
  ) %>%
  group_by(outsourcing_status, Ethnicity) %>%
  summarise(
    n = n()
  ) %>%
  mutate(
    Sum = sum(n),
    Percentage = 100 * (n / Sum)
  )

group_1 <- t(tibble("present"=summary_table[which(summary_table["Ethnicity"]=="White British" & 
                                 summary_table["outsourcing_status"]=="Outsourced"),"n"],
"not present" = summary_table[which(summary_table["Ethnicity"]=="Non-White British" & 
                                 summary_table["outsourcing_status"]=="Outsourced"),"n"]
))

group_2 <- t(tibble("present"=summary_table[which(summary_table["Ethnicity"]=="White British" & 
                                 summary_table["outsourcing_status"]=="Not outsourced"),"n"],
"not present" = summary_table[which(summary_table["Ethnicity"]=="Non-White British" & 
                                 summary_table["outsourcing_status"]=="Not outsourced"),"n"]
))


comp_mat <- as.matrix(cbind(group_2, group_1)) # matrix for crosstable
x2 <- gmodels::CrossTable(comp_mat, fisher=TRUE)
# `r if(x2[["chisq"]][["p.value"]] < .001, "< .001", paste0("= ", round(x2[["chisq"]][["p.value"]],2)))`).
# (chi-square = `r round(x2[["chisq"]][["statistic"]][["X-squared"]],2)`, *p* = `r round(x2[["chisq"]][["p.value"]],3)`).

Breaking down by ethnicity shows that the outsourced group has a lower proportion of White workers compared to the non-outsourced group. For example, in the outsourced group, the proportion of British (‘White’) workers is 66.91 %, compared to 78.01% in the not outsourced group. Needless to say, this means that there is a correspondingly higher proportion of workers from minority backgrounds in the outsourced group, notably from African (4.1%) and other White backgrounds (5.5, amongst others.12 These differences mean that outsourced workers are 1.87 times more likely to be a member of minority ethnicity than non-outsourced workers.

Code
ethnicity_statistics %>%
  # mutate(
  #   Ethnicity = haven::as_factor(Ethnicity)
  #   ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
outsourcing_status Ethnicity_labelled n Frequency N Sum Percentage Ethnicity_short Ethnicity detail
Not outsourced English / Welsh / Scottish / Northern Irish / British 6623 6589.16 8472 8446.64 78.01 English Welsh / Scottish / Northern Irish / British
Not outsourced Irish 111 98.75 8472 8446.64 1.17 Irish NA
Not outsourced Gypsy or Irish Traveller 6 8.32 8472 8446.64 0.10 Gypsy or Irish Traveller NA
Not outsourced Roma 4 5.24 8472 8446.64 0.06 Roma NA
Not outsourced Any other White background 369 385.42 8472 8446.64 4.56 Any other White background NA
Not outsourced White and Black Caribbean 84 53.22 8472 8446.64 0.63 White and Black Caribbean NA
Not outsourced White and Black African 40 20.43 8472 8446.64 0.24 White and Black African NA
Not outsourced White and Asian 61 32.39 8472 8446.64 0.38 White and Asian NA
Not outsourced Any other Mixed / Multiple ethnic background 59 33.60 8472 8446.64 0.40 Any other Mixed Multiple ethnic background
Not outsourced Indian 170 237.76 8472 8446.64 2.81 Indian NA
Not outsourced Pakistani 93 96.09 8472 8446.64 1.14 Pakistani NA
Not outsourced Bangladeshi 47 53.46 8472 8446.64 0.63 Bangladeshi NA
Not outsourced Chinese 70 124.50 8472 8446.64 1.47 Chinese NA
Not outsourced Any other Asian background 57 118.35 8472 8446.64 1.40 Any other Asian background NA
Not outsourced African 232 157.09 8472 8446.64 1.86 African NA
Not outsourced Caribbean 74 56.56 8472 8446.64 0.67 Caribbean NA
Not outsourced Any other Black, Black British, or Caribbean background 36 25.60 8472 8446.64 0.30 Any other Black Black British, or Caribbean background
Not outsourced Arab 12 20.47 8472 8446.64 0.24 Arab NA
Not outsourced Any other ethnic group 13 23.40 8472 8446.64 0.28 Any other ethnic group NA
Not outsourced Don’t think of myself as any of these 7 5.87 8472 8446.64 0.07 Don’t think of myself as any of these NA
Not outsourced Prefer not to say 22 23.54 8472 8446.64 0.28 Prefer not to say NA
Not outsourced NA 282 277.44 8472 8446.64 3.28 NA NA
Outsourced English / Welsh / Scottish / Northern Irish / British 1124 1143.07 1683 1708.36 66.91 English Welsh / Scottish / Northern Irish / British
Outsourced Irish 17 14.86 1683 1708.36 0.87 Irish NA
Outsourced Gypsy or Irish Traveller 2 2.48 1683 1708.36 0.14 Gypsy or Irish Traveller NA
Outsourced Roma 3 2.25 1683 1708.36 0.13 Roma NA
Outsourced Any other White background 80 93.96 1683 1708.36 5.50 Any other White background NA
Outsourced White and Black Caribbean 11 5.53 1683 1708.36 0.32 White and Black Caribbean NA
Outsourced White and Black African 26 14.61 1683 1708.36 0.86 White and Black African NA
Outsourced White and Asian 12 9.13 1683 1708.36 0.53 White and Asian NA
Outsourced Any other Mixed / Multiple ethnic background 23 15.89 1683 1708.36 0.93 Any other Mixed Multiple ethnic background
Outsourced Indian 55 73.98 1683 1708.36 4.33 Indian NA
Outsourced Pakistani 49 53.85 1683 1708.36 3.15 Pakistani NA
Outsourced Bangladeshi 21 23.04 1683 1708.36 1.35 Bangladeshi NA
Outsourced Chinese 12 21.04 1683 1708.36 1.23 Chinese NA
Outsourced Any other Asian background 26 44.80 1683 1708.36 2.62 Any other Asian background NA
Outsourced African 111 69.96 1683 1708.36 4.10 African NA
Outsourced Caribbean 15 15.11 1683 1708.36 0.88 Caribbean NA
Outsourced Any other Black, Black British, or Caribbean background 16 11.79 1683 1708.36 0.69 Any other Black Black British, or Caribbean background
Outsourced Arab 7 12.03 1683 1708.36 0.70 Arab NA
Outsourced Any other ethnic group 3 7.06 1683 1708.36 0.41 Any other ethnic group NA
Outsourced Don’t think of myself as any of these 5 2.94 1683 1708.36 0.17 Don’t think of myself as any of these NA
Outsourced Prefer not to say 4 6.92 1683 1708.36 0.40 Prefer not to say NA
Outsourced NA 61 64.07 1683 1708.36 3.75 NA NA
Code
data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(outsourcing_status, Ethnicity) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    n = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    N = n()
  ) %>%
  ggplot(., aes(outsourcing_status, Percentage, fill = as.factor(Ethnicity))) +
  geom_col(colour="black") +
  annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
  coord_flip() +
  scale_fill_manual(values = many_colours, name = "Ethnicity") +
  xlab("Outsourcing group") +
  theme_minimal()

Code
ethnicity_key <- data.frame("number" = seq(1,22,1),
                            "ethnicity" = c(levels(ethnicity_statistics$Ethnicity_labelled), NA))

ethnicity_key %>%
  kable() %>%
  kable_styling(full_width = F)
number ethnicity
1 English / Welsh / Scottish / Northern Irish / British
2 Irish
3 Gypsy or Irish Traveller
4 Roma
5 Any other White background
6 White and Black Caribbean
7 White and Black African
8 White and Asian
9 Any other Mixed / Multiple ethnic background
10 Indian
11 Pakistani
12 Bangladeshi
13 Chinese
14 Any other Asian background
15 African
16 Caribbean
17 Any other Black, Black British, or Caribbean background
18 Arab
19 Any other ethnic group
20 Don’t think of myself as any of these
21 Prefer not to say
22 NA
Code
ethnicity_statistics_2 <- data %>%
  group_by(outsourcing_status, Ethnicity_collapsed) %>%
  summarise(
    n = n(), # count cases
    Frequency = sum(NatRepemployees) # count weighted cases
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) #%>%
    # separate_wider_delim(Ethnicity_short, 
    #                      names = c("Ethnicity_short", "Ethnicity detail"), 
    #                      delim = stringr::regex(" / |, "),  # use multiple delims
    #                      too_few = "align_start",
    #                      too_many = "merge")

#readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_2.csv")
Code
ethnicity_statistics_2 %>%
  # mutate(
  #   Ethnicity = haven::as_factor(Ethnicity)
  #   ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
outsourcing_status Ethnicity_collapsed n Frequency N Sum Percentage
Not outsourced White 6734 6687.91 8472 8446.64 79.18
Not outsourced White other 379 398.98 8472 8446.64 4.72
Not outsourced Black Caribbean 158 109.78 8472 8446.64 1.30
Not outsourced Black African 272 177.52 8472 8446.64 2.10
Not outsourced Mixed other 120 65.99 8472 8446.64 0.78
Not outsourced South Asian 310 387.30 8472 8446.64 4.59
Not outsourced East Asian 70 124.50 8472 8446.64 1.47
Not outsourced Other 381 448.60 8472 8446.64 5.31
Not outsourced Black other 36 25.60 8472 8446.64 0.30
Not outsourced Arab 12 20.47 8472 8446.64 0.24
Outsourced White 1141 1157.93 1683 1708.36 67.78
Outsourced White other 85 98.68 1683 1708.36 5.78
Outsourced Black Caribbean 26 20.64 1683 1708.36 1.21
Outsourced Black African 137 84.58 1683 1708.36 4.95
Outsourced Mixed other 35 25.02 1683 1708.36 1.46
Outsourced South Asian 125 150.87 1683 1708.36 8.83
Outsourced East Asian 12 21.04 1683 1708.36 1.23
Outsourced Other 99 125.78 1683 1708.36 7.36
Outsourced Black other 16 11.79 1683 1708.36 0.69
Outsourced Arab 7 12.03 1683 1708.36 0.70
Code
data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(outsourcing_status, Ethnicity_collapsed) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    n = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    N = n()
  ) %>%
  ggplot(., aes(outsourcing_status, Percentage, fill = Ethnicity_collapsed)) +
  geom_col(colour="black") +
  annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$N)) +
  coord_flip() +
  scale_fill_manual(values = many_colours, name = "Ethnicity") +
  xlab("Outsourcing group") +
  theme_minimal()

Code
mod <- glm(outsourcing_status ~ Ethnicity_collapsed, data, family="binomial", weights=NatRepemployees)
summary(mod)

Call:
glm(formula = outsourcing_status ~ Ethnicity_collapsed, family = "binomial", 
    data = data, weights = NatRepemployees)

Coefficients:
                                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)                        -1.75366    0.03183 -55.095  < 2e-16 ***
Ethnicity_collapsedWhite other      0.35669    0.11685   3.053 0.002268 ** 
Ethnicity_collapsedBlack Caribbean  0.08232    0.24203   0.340 0.733754    
Ethnicity_collapsedBlack African    1.01225    0.13590   7.448 9.45e-14 ***
Ethnicity_collapsedMixed other      0.78383    0.23693   3.308 0.000939 ***
Ethnicity_collapsedSouth Asian      0.81086    0.10111   8.020 1.06e-15 ***
Ethnicity_collapsedEast Asian      -0.02436    0.23787  -0.102 0.918417    
Ethnicity_collapsedOther            0.48211    0.10579   4.557 5.19e-06 ***
Ethnicity_collapsedBlack other      0.97803    0.35344   2.767 0.005655 ** 
Ethnicity_collapsedArab             1.22173    0.36469   3.350 0.000808 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 9201.8  on 10154  degrees of freedom
Residual deviance: 9065.3  on 10145  degrees of freedom
AIC: 10088

Number of Fisher Scoring iterations: 4
Code
coef_table <- data.frame("estimate" = mod[["coefficients"]]) %>%
  mutate(
    or = round(exp(estimate),2)
  )

Comparison of ethnicities indicates that some groups are statistically more likely to be outsourced than others

  • Black African workers are 2.75 times more likely than White workers to be outsourced.
  • Mixed other workers are 2.19 times more likely than White workers to be outsourced.
  • South Asian workers are 2.25 times more likely than White workers to be outsourced.
  • Other workers are 1.62 times more likely than White workers to be outsourced.
  • Black other workers are 2.66 times more likely than White workers to be outsourced.
  • Arab workers are 3.39 times more likely than White workers to be outsourced.

White other, Black Caribbean, and East Asian workers are no more or less likely than White workers to be outsourced.

Paysplit13

Code
ethnicity_summary_paysplit <- data %>%
  group_by(outsourcing_status, income_group, Ethnicity_labelled) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    Ethnicity_short = Ethnicity_labelled
  ) %>%
    separate_wider_delim(Ethnicity_short, 
                         names = c("Ethnicity_short", "Ethnicity detail"), 
                         delim = stringr::regex(" / |, "),  # use multiple delims
                         too_few = "align_start",
                         too_many = "merge")

readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_paysplit_1.csv")
Code
temp_data <-  ethnicity_summary_paysplit %>%
  drop_na(income_group)
  
for(group in unique(temp_data$income_group)){
  plot_data <- temp_data %>%
    filter(income_group==group)

  plot <- plot_data %>%
    ggplot(., aes(Ethnicity_short, Percentage, fill = outsourcing_status)) +
      geom_col(colour="black", position = position_dodge()) +
      #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
      coord_flip() +
      scale_fill_manual(values = many_colours, name = "Ethnicity") +
      xlab("Outsourcing group") +
      theme_minimal() +
    theme(
      legend.position = "bottom"
    ) +
    ggtitle(paste0(group, " income"))
  
  
  print(plot)
}

Code
# ethnicity_summary_paysplit %>%
#   drop_na(income_group) %>%
#   ggplot(., aes(outsourcing_status, Percentage, fill = Ethnicity_short)) +
#   facet_grid(rows=~income_group) +
#   geom_col(colour="black") +
#   #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
#   coord_flip() +
#   scale_fill_manual(values = many_colours, name = "Ethnicity") +
#   xlab("Outsourcing group") +
#   theme_minimal() +
#   theme(
#     legend.position = "bottom"
#   )
Code
ethnicity_summary_paysplit <- data %>%
  group_by(outsourcing_status, income_group, Ethnicity_collapsed) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) 

#readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_paysplit_1.csv")
Code
temp_data <-  ethnicity_summary_paysplit %>%
  drop_na(income_group)
  
for(group in unique(temp_data$income_group)){
  plot_data <- temp_data %>%
    filter(income_group==group)

  plot <- plot_data %>%
    ggplot(., aes(Ethnicity_collapsed, Percentage, fill = outsourcing_status)) +
      geom_col(colour="black", position = position_dodge()) +
      #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
      coord_flip() +
      scale_fill_manual(values = many_colours, name = "Ethnicity") +
      xlab("Outsourcing group") +
      theme_minimal() +
    theme(
      legend.position = "bottom"
    ) +
    ggtitle(paste0(group, " income"))
  
  
  print(plot)
}

Code
# ethnicity_summary_paysplit %>%
#   drop_na(income_group) %>%
#   ggplot(., aes(outsourcing_status, Percentage, fill = Ethnicity_short)) +
#   facet_grid(rows=~income_group) +
#   geom_col(colour="black") +
#   #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
#   coord_flip() +
#   scale_fill_manual(values = many_colours, name = "Ethnicity") +
#   xlab("Outsourcing group") +
#   theme_minimal() +
#   theme(
#     legend.position = "bottom"
#   )
Code
mod <- glm(outsourcing_status ~ income_group*Ethnicity_collapsed, data, family="binomial")
summary(mod)

Call:
glm(formula = outsourcing_status ~ income_group * Ethnicity_collapsed, 
    family = "binomial", data = data)

Coefficients:
                                                   Estimate Std. Error z value
(Intercept)                                        -1.83224    0.04438 -41.286
income_groupLow                                     0.17246    0.06883   2.506
Ethnicity_collapsedWhite other                      0.33035    0.16107   2.051
Ethnicity_collapsedBlack Caribbean                 -0.19371    0.31031  -0.624
Ethnicity_collapsedBlack African                    1.23328    0.13921   8.859
Ethnicity_collapsedMixed other                      0.23623    0.28662   0.824
Ethnicity_collapsedSouth Asian                      0.92653    0.14299   6.480
Ethnicity_collapsedEast Asian                       0.17932    0.36655   0.489
Ethnicity_collapsedOther                            0.42737    0.18195   2.349
Ethnicity_collapsedBlack other                      1.19039    0.39319   3.027
Ethnicity_collapsedArab                             1.32142    0.51830   2.550
income_groupLow:Ethnicity_collapsedWhite other     -0.13691    0.27257  -0.502
income_groupLow:Ethnicity_collapsedBlack Caribbean  0.55421    0.45290   1.224
income_groupLow:Ethnicity_collapsedBlack African   -0.44101    0.23655  -1.864
income_groupLow:Ethnicity_collapsedMixed other      0.64585    0.41324   1.563
income_groupLow:Ethnicity_collapsedSouth Asian     -0.01517    0.23811  -0.064
income_groupLow:Ethnicity_collapsedEast Asian      -0.12898    0.73289  -0.176
income_groupLow:Ethnicity_collapsedOther            0.33341    0.27581   1.209
income_groupLow:Ethnicity_collapsedBlack other     -0.70927    0.69591  -1.019
income_groupLow:Ethnicity_collapsedArab            -0.35479    1.33094  -0.267
                                                   Pr(>|z|)    
(Intercept)                                         < 2e-16 ***
income_groupLow                                     0.01222 *  
Ethnicity_collapsedWhite other                      0.04027 *  
Ethnicity_collapsedBlack Caribbean                  0.53246    
Ethnicity_collapsedBlack African                    < 2e-16 ***
Ethnicity_collapsedMixed other                      0.40983    
Ethnicity_collapsedSouth Asian                     9.19e-11 ***
Ethnicity_collapsedEast Asian                       0.62469    
Ethnicity_collapsedOther                            0.01883 *  
Ethnicity_collapsedBlack other                      0.00247 ** 
Ethnicity_collapsedArab                             0.01079 *  
income_groupLow:Ethnicity_collapsedWhite other      0.61548    
income_groupLow:Ethnicity_collapsedBlack Caribbean  0.22107    
income_groupLow:Ethnicity_collapsedBlack African    0.06228 .  
income_groupLow:Ethnicity_collapsedMixed other      0.11808    
income_groupLow:Ethnicity_collapsedSouth Asian      0.94921    
income_groupLow:Ethnicity_collapsedEast Asian       0.86030    
income_groupLow:Ethnicity_collapsedOther            0.22673    
income_groupLow:Ethnicity_collapsedBlack other      0.30811    
income_groupLow:Ethnicity_collapsedArab             0.78980    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 8127.6  on 8942  degrees of freedom
Residual deviance: 7956.9  on 8923  degrees of freedom
  (1212 observations deleted due to missingness)
AIC: 7996.9

Number of Fisher Scoring iterations: 4
Code
test_data <- data %>%
  drop_na(income_group) %>%
  mutate(
    income_group = factor(income_group, levels = c("Not low", "Low")),
    Ethnicity_short = Ethnicity_labelled
  ) %>%
  separate_wider_delim(Ethnicity_short, 
                       names = c("Ethnicity_short", "Ethnicity detail"), 
                       delim = " / ", 
                       too_few = "align_start",
                       too_many = "merge") %>%
  mutate(
    Ethnicity_short = forcats::fct_relevel(factor(Ethnicity_short), "English")
  )


test <- glm(outsourcing_status ~ Age + Gender + income_group + Ethnicity_short, family = "binomial", data = test_data)
summary(test)

Call:
glm(formula = outsourcing_status ~ Age + Gender + income_group + 
    Ethnicity_short, family = "binomial", data = test_data)

Coefficients:
                                                                        Estimate
(Intercept)                                                            -1.122623
Age                                                                    -0.024661
GenderMale                                                              0.531731
GenderOther                                                             0.128987
GenderPrefer not to say                                                -0.240452
income_groupLow                                                         0.276827
Ethnicity_shortAfrican                                                  0.750804
Ethnicity_shortAny other Asian background                               0.691922
Ethnicity_shortAny other Black, Black British, or Caribbean background  0.752002
Ethnicity_shortAny other ethnic group                                   0.220800
Ethnicity_shortAny other Mixed                                          0.673451
Ethnicity_shortAny other White background                               0.171750
Ethnicity_shortArab                                                     0.989803
Ethnicity_shortBangladeshi                                              0.703248
Ethnicity_shortCaribbean                                                0.183752
Ethnicity_shortChinese                                                  0.021374
Ethnicity_shortDon’t think of myself as any of these                    1.449163
Ethnicity_shortGypsy or Irish Traveller                                 0.282974
Ethnicity_shortIndian                                                   0.548848
Ethnicity_shortIrish                                                   -0.126226
Ethnicity_shortPakistani                                                0.864916
Ethnicity_shortPrefer not to say                                       -0.006321
Ethnicity_shortRoma                                                     1.082635
Ethnicity_shortWhite and Asian                                         -0.090237
Ethnicity_shortWhite and Black African                                  1.155726
Ethnicity_shortWhite and Black Caribbean                               -0.279236
                                                                       Std. Error
(Intercept)                                                              0.111181
Age                                                                      0.002392
GenderMale                                                               0.061348
GenderOther                                                              0.780590
GenderPrefer not to say                                                  1.056849
income_groupLow                                                          0.061893
Ethnicity_shortAfrican                                                   0.125846
Ethnicity_shortAny other Asian background                                0.258347
Ethnicity_shortAny other Black, Black British, or Caribbean background   0.330047
Ethnicity_shortAny other ethnic group                                    0.647274
Ethnicity_shortAny other Mixed                                           0.264347
Ethnicity_shortAny other White background                                0.135112
Ethnicity_shortArab                                                      0.483591
Ethnicity_shortBangladeshi                                               0.286337
Ethnicity_shortCaribbean                                                 0.300048
Ethnicity_shortChinese                                                   0.319975
Ethnicity_shortDon’t think of myself as any of these                     0.659571
Ethnicity_shortGypsy or Irish Traveller                                  0.843433
Ethnicity_shortIndian                                                    0.163023
Ethnicity_shortIrish                                                     0.292763
Ethnicity_shortPakistani                                                 0.188944
Ethnicity_shortPrefer not to say                                         0.633334
Ethnicity_shortRoma                                                      0.769140
Ethnicity_shortWhite and Asian                                           0.335327
Ethnicity_shortWhite and Black African                                   0.262938
Ethnicity_shortWhite and Black Caribbean                                 0.345023
                                                                       z value
(Intercept)                                                            -10.097
Age                                                                    -10.311
GenderMale                                                               8.667
GenderOther                                                              0.165
GenderPrefer not to say                                                 -0.228
income_groupLow                                                          4.473
Ethnicity_shortAfrican                                                   5.966
Ethnicity_shortAny other Asian background                                2.678
Ethnicity_shortAny other Black, Black British, or Caribbean background   2.278
Ethnicity_shortAny other ethnic group                                    0.341
Ethnicity_shortAny other Mixed                                           2.548
Ethnicity_shortAny other White background                                1.271
Ethnicity_shortArab                                                      2.047
Ethnicity_shortBangladeshi                                               2.456
Ethnicity_shortCaribbean                                                 0.612
Ethnicity_shortChinese                                                   0.067
Ethnicity_shortDon’t think of myself as any of these                     2.197
Ethnicity_shortGypsy or Irish Traveller                                  0.336
Ethnicity_shortIndian                                                    3.367
Ethnicity_shortIrish                                                    -0.431
Ethnicity_shortPakistani                                                 4.578
Ethnicity_shortPrefer not to say                                        -0.010
Ethnicity_shortRoma                                                      1.408
Ethnicity_shortWhite and Asian                                          -0.269
Ethnicity_shortWhite and Black African                                   4.395
Ethnicity_shortWhite and Black Caribbean                                -0.809
                                                                       Pr(>|z|)
(Intercept)                                                             < 2e-16
Age                                                                     < 2e-16
GenderMale                                                              < 2e-16
GenderOther                                                            0.868753
GenderPrefer not to say                                                0.820021
income_groupLow                                                        7.73e-06
Ethnicity_shortAfrican                                                 2.43e-09
Ethnicity_shortAny other Asian background                              0.007400
Ethnicity_shortAny other Black, Black British, or Caribbean background 0.022698
Ethnicity_shortAny other ethnic group                                  0.733011
Ethnicity_shortAny other Mixed                                         0.010847
Ethnicity_shortAny other White background                              0.203669
Ethnicity_shortArab                                                    0.040680
Ethnicity_shortBangladeshi                                             0.014049
Ethnicity_shortCaribbean                                               0.540267
Ethnicity_shortChinese                                                 0.946741
Ethnicity_shortDon’t think of myself as any of these                   0.028011
Ethnicity_shortGypsy or Irish Traveller                                0.737246
Ethnicity_shortIndian                                                  0.000761
Ethnicity_shortIrish                                                   0.666357
Ethnicity_shortPakistani                                               4.70e-06
Ethnicity_shortPrefer not to say                                       0.992037
Ethnicity_shortRoma                                                    0.159252
Ethnicity_shortWhite and Asian                                         0.787852
Ethnicity_shortWhite and Black African                                 1.11e-05
Ethnicity_shortWhite and Black Caribbean                               0.418329
                                                                          
(Intercept)                                                            ***
Age                                                                    ***
GenderMale                                                             ***
GenderOther                                                               
GenderPrefer not to say                                                   
income_groupLow                                                        ***
Ethnicity_shortAfrican                                                 ***
Ethnicity_shortAny other Asian background                              ** 
Ethnicity_shortAny other Black, Black British, or Caribbean background *  
Ethnicity_shortAny other ethnic group                                     
Ethnicity_shortAny other Mixed                                         *  
Ethnicity_shortAny other White background                                 
Ethnicity_shortArab                                                    *  
Ethnicity_shortBangladeshi                                             *  
Ethnicity_shortCaribbean                                                  
Ethnicity_shortChinese                                                    
Ethnicity_shortDon’t think of myself as any of these                   *  
Ethnicity_shortGypsy or Irish Traveller                                   
Ethnicity_shortIndian                                                  ***
Ethnicity_shortIrish                                                      
Ethnicity_shortPakistani                                               ***
Ethnicity_shortPrefer not to say                                          
Ethnicity_shortRoma                                                       
Ethnicity_shortWhite and Asian                                            
Ethnicity_shortWhite and Black African                                 ***
Ethnicity_shortWhite and Black Caribbean                                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 7918.6  on 8739  degrees of freedom
Residual deviance: 7567.2  on 8714  degrees of freedom
  (203 observations deleted due to missingness)
AIC: 7619.2

Number of Fisher Scoring iterations: 4

Another way of looking at this is to calculate, for each ethnicity, the proportion of workers in each outsourcing group. Doing so yields the plot below.14

Code
ethnicity_statistics <- data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(Ethnicity, outsourcing_status) %>%
  summarise(
    Frequency = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) %>%
  rename(
    `Outsourcing group` = outsourcing_status
  )

ethnicity_statistics %>%
  mutate(
    Ethnicity = haven::as_factor(Ethnicity)
    ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
Ethnicity Outsourcing group Frequency Sum Percentage
English / Welsh / Scottish / Northern Irish / British Not outsourced 6623 7747 85.49
English / Welsh / Scottish / Northern Irish / British Outsourced 1124 7747 14.51
Irish Not outsourced 111 128 86.72
Irish Outsourced 17 128 13.28
Gypsy or Irish Traveller Not outsourced 6 8 75.00
Gypsy or Irish Traveller Outsourced 2 8 25.00
Roma Not outsourced 4 7 57.14
Roma Outsourced 3 7 42.86
Any other White background Not outsourced 369 449 82.18
Any other White background Outsourced 80 449 17.82
White and Black Caribbean Not outsourced 84 95 88.42
White and Black Caribbean Outsourced 11 95 11.58
White and Black African Not outsourced 40 66 60.61
White and Black African Outsourced 26 66 39.39
White and Asian Not outsourced 61 73 83.56
White and Asian Outsourced 12 73 16.44
Any other Mixed / Multiple ethnic background Not outsourced 59 82 71.95
Any other Mixed / Multiple ethnic background Outsourced 23 82 28.05
Indian Not outsourced 170 225 75.56
Indian Outsourced 55 225 24.44
Pakistani Not outsourced 93 142 65.49
Pakistani Outsourced 49 142 34.51
Bangladeshi Not outsourced 47 68 69.12
Bangladeshi Outsourced 21 68 30.88
Chinese Not outsourced 70 82 85.37
Chinese Outsourced 12 82 14.63
Any other Asian background Not outsourced 57 83 68.67
Any other Asian background Outsourced 26 83 31.33
African Not outsourced 232 343 67.64
African Outsourced 111 343 32.36
Caribbean Not outsourced 74 89 83.15
Caribbean Outsourced 15 89 16.85
Any other Black, Black British, or Caribbean background Not outsourced 36 52 69.23
Any other Black, Black British, or Caribbean background Outsourced 16 52 30.77
Arab Not outsourced 12 19 63.16
Arab Outsourced 7 19 36.84
Any other ethnic group Not outsourced 13 16 81.25
Any other ethnic group Outsourced 3 16 18.75
Don’t think of myself as any of these Not outsourced 7 12 58.33
Don’t think of myself as any of these Outsourced 5 12 41.67
Prefer not to say Not outsourced 22 26 84.62
Prefer not to say Outsourced 4 26 15.38
NA Not outsourced 282 343 82.22
NA Outsourced 61 343 17.78
Code
# ordering_df <- ethnicity_statistics %>%
#   filter(`Outsourcing group` == "Outsourced") %>%
#   mutate(
#     Ethnicity = haven::as_factor(Ethnicity),
#     Ethnicity = factor(Ethnicity),
#     Ethnicity = forcats::fct_reorder(Ethnicity, Percentage)
#   )

ethnicity_statistics %>%
  mutate(
    Ethnicity = haven::as_factor(Ethnicity)
    ) %>%
  ggplot(., aes(Ethnicity, Percentage, fill = `Outsourcing group`)) +
  geom_col(colour="black") +
  annotate("text", x = ethnicity_statistics$Ethnicity, y = 75, label = paste0("n=", ethnicity_statistics$Sum)) +
  coord_flip() +
  scale_fill_manual(values=many_colours, name = "Ethnicity")

Code
write_csv(ethnicity_statistics, file="../outputs/data/ethnicity_stats_2.csv")

Ethnicity repeat: Outsourcing groups

Code
ethnicity_statistics <- data %>%
  group_by(outsourcing_group, Ethnicity_labelled) %>%
  summarise(
    n = n(), # count cases
    Frequency = sum(NatRepemployees) # count weighted cases
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    Ethnicity_short = Ethnicity_labelled
  ) %>%
    separate_wider_delim(Ethnicity_short, 
                         names = c("Ethnicity_short", "Ethnicity detail"), 
                         delim = stringr::regex(" / |, "),  # use multiple delims
                         too_few = "align_start",
                         too_many = "merge")

readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_outsourcing_groups_1.csv")
Code
# test <- multinom(outsourcing_group ~ Ethnicity_collapsed, data, weights = NatRepemployees)
# summary(test)
# 
# z <- summary(test)$coefficients/summary(test)$standard.errors
# z
# 
# p <- (1 - pnorm(abs(z), 0, 1)) * 2
# p
# 
# # Assuming your dataframe is named 'p'
# p_2 <- apply(p, 2, function(x) ifelse(x < 0.01, 1, NA))
# 
# sig_ors <- exp(summary(test)$coefficients * p_2)

# we can take the results of this forward and plot the ors
Code
ethnicity_statistics %>%
  # mutate(
  #   Ethnicity = haven::as_factor(Ethnicity)
  #   ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
outsourcing_group Ethnicity_labelled n Frequency N Sum Percentage Ethnicity_short Ethnicity detail
Not outsourced English / Welsh / Scottish / Northern Irish / British 6623 6589.16 8472 8446.64 78.01 English Welsh / Scottish / Northern Irish / British
Not outsourced Irish 111 98.75 8472 8446.64 1.17 Irish NA
Not outsourced Gypsy or Irish Traveller 6 8.32 8472 8446.64 0.10 Gypsy or Irish Traveller NA
Not outsourced Roma 4 5.24 8472 8446.64 0.06 Roma NA
Not outsourced Any other White background 369 385.42 8472 8446.64 4.56 Any other White background NA
Not outsourced White and Black Caribbean 84 53.22 8472 8446.64 0.63 White and Black Caribbean NA
Not outsourced White and Black African 40 20.43 8472 8446.64 0.24 White and Black African NA
Not outsourced White and Asian 61 32.39 8472 8446.64 0.38 White and Asian NA
Not outsourced Any other Mixed / Multiple ethnic background 59 33.60 8472 8446.64 0.40 Any other Mixed Multiple ethnic background
Not outsourced Indian 170 237.76 8472 8446.64 2.81 Indian NA
Not outsourced Pakistani 93 96.09 8472 8446.64 1.14 Pakistani NA
Not outsourced Bangladeshi 47 53.46 8472 8446.64 0.63 Bangladeshi NA
Not outsourced Chinese 70 124.50 8472 8446.64 1.47 Chinese NA
Not outsourced Any other Asian background 57 118.35 8472 8446.64 1.40 Any other Asian background NA
Not outsourced African 232 157.09 8472 8446.64 1.86 African NA
Not outsourced Caribbean 74 56.56 8472 8446.64 0.67 Caribbean NA
Not outsourced Any other Black, Black British, or Caribbean background 36 25.60 8472 8446.64 0.30 Any other Black Black British, or Caribbean background
Not outsourced Arab 12 20.47 8472 8446.64 0.24 Arab NA
Not outsourced Any other ethnic group 13 23.40 8472 8446.64 0.28 Any other ethnic group NA
Not outsourced Don’t think of myself as any of these 7 5.87 8472 8446.64 0.07 Don’t think of myself as any of these NA
Not outsourced Prefer not to say 22 23.54 8472 8446.64 0.28 Prefer not to say NA
Not outsourced NA 282 277.44 8472 8446.64 3.28 NA NA
Outsourced English / Welsh / Scottish / Northern Irish / British 742 778.19 1123 1161.08 67.02 English Welsh / Scottish / Northern Irish / British
Outsourced Irish 12 11.23 1123 1161.08 0.97 Irish NA
Outsourced Gypsy or Irish Traveller 2 2.48 1123 1161.08 0.21 Gypsy or Irish Traveller NA
Outsourced Roma 2 1.48 1123 1161.08 0.13 Roma NA
Outsourced Any other White background 63 72.25 1123 1161.08 6.22 Any other White background NA
Outsourced White and Black Caribbean 8 3.87 1123 1161.08 0.33 White and Black Caribbean NA
Outsourced White and Black African 21 11.08 1123 1161.08 0.95 White and Black African NA
Outsourced White and Asian 9 5.80 1123 1161.08 0.50 White and Asian NA
Outsourced Any other Mixed / Multiple ethnic background 15 9.84 1123 1161.08 0.85 Any other Mixed Multiple ethnic background
Outsourced Indian 32 43.96 1123 1161.08 3.79 Indian NA
Outsourced Pakistani 29 32.69 1123 1161.08 2.82 Pakistani NA
Outsourced Bangladeshi 15 17.95 1123 1161.08 1.55 Bangladeshi NA
Outsourced Chinese 7 12.75 1123 1161.08 1.10 Chinese NA
Outsourced Any other Asian background 17 30.35 1123 1161.08 2.61 Any other Asian background NA
Outsourced African 74 47.20 1123 1161.08 4.07 African NA
Outsourced Caribbean 10 10.40 1123 1161.08 0.90 Caribbean NA
Outsourced Any other Black, Black British, or Caribbean background 13 9.46 1123 1161.08 0.81 Any other Black Black British, or Caribbean background
Outsourced Arab 3 4.97 1123 1161.08 0.43 Arab NA
Outsourced Any other ethnic group 1 1.52 1123 1161.08 0.13 Any other ethnic group NA
Outsourced Don’t think of myself as any of these 4 2.54 1123 1161.08 0.22 Don’t think of myself as any of these NA
Outsourced Prefer not to say 1 1.67 1123 1161.08 0.14 Prefer not to say NA
Outsourced NA 43 49.38 1123 1161.08 4.25 NA NA
Likely agency English / Welsh / Scottish / Northern Irish / British 180 174.33 269 266.54 65.41 English Welsh / Scottish / Northern Irish / British
Likely agency Irish 1 0.66 269 266.54 0.25 Irish NA
Likely agency Roma 1 0.77 269 266.54 0.29 Roma NA
Likely agency Any other White background 10 13.33 269 266.54 5.00 Any other White background NA
Likely agency White and Black Caribbean 1 0.69 269 266.54 0.26 White and Black Caribbean NA
Likely agency White and Black African 2 0.91 269 266.54 0.34 White and Black African NA
Likely agency White and Asian 2 2.54 269 266.54 0.95 White and Asian NA
Likely agency Any other Mixed / Multiple ethnic background 5 4.33 269 266.54 1.63 Any other Mixed Multiple ethnic background
Likely agency Indian 8 11.83 269 266.54 4.44 Indian NA
Likely agency Pakistani 8 9.74 269 266.54 3.65 Pakistani NA
Likely agency Bangladeshi 3 2.61 269 266.54 0.98 Bangladeshi NA
Likely agency Chinese 1 1.58 269 266.54 0.59 Chinese NA
Likely agency Any other Asian background 5 8.34 269 266.54 3.13 Any other Asian background NA
Likely agency African 22 12.82 269 266.54 4.81 African NA
Likely agency Caribbean 3 3.39 269 266.54 1.27 Caribbean NA
Likely agency Any other Black, Black British, or Caribbean background 1 1.16 269 266.54 0.44 Any other Black Black British, or Caribbean background
Likely agency Arab 2 3.42 269 266.54 1.28 Arab NA
Likely agency Any other ethnic group 1 3.93 269 266.54 1.48 Any other ethnic group NA
Likely agency Don’t think of myself as any of these 1 0.40 269 266.54 0.15 Don’t think of myself as any of these NA
Likely agency Prefer not to say 1 0.52 269 266.54 0.20 Prefer not to say NA
Likely agency NA 11 9.20 269 266.54 3.45 NA NA
High indicators English / Welsh / Scottish / Northern Irish / British 202 190.56 291 280.74 67.88 English Welsh / Scottish / Northern Irish / British
High indicators Irish 4 2.97 291 280.74 1.06 Irish NA
High indicators Any other White background 7 8.37 291 280.74 2.98 Any other White background NA
High indicators White and Black Caribbean 2 0.97 291 280.74 0.35 White and Black Caribbean NA
High indicators White and Black African 3 2.62 291 280.74 0.93 White and Black African NA
High indicators White and Asian 1 0.78 291 280.74 0.28 White and Asian NA
High indicators Any other Mixed / Multiple ethnic background 3 1.71 291 280.74 0.61 Any other Mixed Multiple ethnic background
High indicators Indian 15 18.18 291 280.74 6.48 Indian NA
High indicators Pakistani 12 11.43 291 280.74 4.07 Pakistani NA
High indicators Bangladeshi 3 2.48 291 280.74 0.88 Bangladeshi NA
High indicators Chinese 4 6.70 291 280.74 2.39 Chinese NA
High indicators Any other Asian background 4 6.10 291 280.74 2.17 Any other Asian background NA
High indicators African 15 9.93 291 280.74 3.54 African NA
High indicators Caribbean 2 1.31 291 280.74 0.47 Caribbean NA
High indicators Any other Black, Black British, or Caribbean background 2 1.16 291 280.74 0.41 Any other Black Black British, or Caribbean background
High indicators Arab 2 3.63 291 280.74 1.29 Arab NA
High indicators Any other ethnic group 1 1.60 291 280.74 0.57 Any other ethnic group NA
High indicators Prefer not to say 2 4.72 291 280.74 1.68 Prefer not to say NA
High indicators NA 7 5.50 291 280.74 1.96 NA NA
Code
data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(outsourcing_group, Ethnicity) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    n = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    N = n()
  ) %>%
  ggplot(., aes(outsourcing_group, Percentage, fill = as.factor(Ethnicity))) +
  geom_col(colour="black") +
  annotate("text", x = ethnicity_statistics$outsourcing_group, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
  coord_flip() +
  scale_fill_manual(values = many_colours, name = "Ethnicity") +
  xlab("Outsourcing group") +
  theme_minimal()

Code
ethnicity_key <- data.frame("number" = seq(1,22,1),
                            "ethnicity" = c(levels(ethnicity_statistics$Ethnicity_labelled), NA))

ethnicity_key %>%
  kable() %>%
  kable_styling(full_width = F)
number ethnicity
1 English / Welsh / Scottish / Northern Irish / British
2 Irish
3 Gypsy or Irish Traveller
4 Roma
5 Any other White background
6 White and Black Caribbean
7 White and Black African
8 White and Asian
9 Any other Mixed / Multiple ethnic background
10 Indian
11 Pakistani
12 Bangladeshi
13 Chinese
14 Any other Asian background
15 African
16 Caribbean
17 Any other Black, Black British, or Caribbean background
18 Arab
19 Any other ethnic group
20 Don’t think of myself as any of these
21 Prefer not to say
22 NA
Code
ethnicity_statistics_2 <- data %>%
  group_by(outsourcing_group, Ethnicity_collapsed) %>%
  summarise(
    n = n(), # count cases
    Frequency = sum(NatRepemployees) # count weighted cases
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) #%>%
    # separate_wider_delim(Ethnicity_short, 
    #                      names = c("Ethnicity_short", "Ethnicity detail"), 
    #                      delim = stringr::regex(" / |, "),  # use multiple delims
    #                      too_few = "align_start",
    #                      too_many = "merge")

#readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_2.csv")
Code
ethnicity_statistics_2 %>%
  # mutate(
  #   Ethnicity = haven::as_factor(Ethnicity)
  #   ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
outsourcing_group Ethnicity_collapsed n Frequency N Sum Percentage
Not outsourced White 6734 6687.91 8472 8446.64 79.18
Not outsourced White other 379 398.98 8472 8446.64 4.72
Not outsourced Black Caribbean 158 109.78 8472 8446.64 1.30
Not outsourced Black African 272 177.52 8472 8446.64 2.10
Not outsourced Mixed other 120 65.99 8472 8446.64 0.78
Not outsourced South Asian 310 387.30 8472 8446.64 4.59
Not outsourced East Asian 70 124.50 8472 8446.64 1.47
Not outsourced Other 381 448.60 8472 8446.64 5.31
Not outsourced Black other 36 25.60 8472 8446.64 0.30
Not outsourced Arab 12 20.47 8472 8446.64 0.24
Outsourced White 754 789.42 1123 1161.08 67.99
Outsourced White other 67 76.21 1123 1161.08 6.56
Outsourced Black Caribbean 18 14.27 1123 1161.08 1.23
Outsourced Black African 95 58.28 1123 1161.08 5.02
Outsourced Mixed other 24 15.64 1123 1161.08 1.35
Outsourced South Asian 76 94.60 1123 1161.08 8.15
Outsourced East Asian 7 12.75 1123 1161.08 1.10
Outsourced Other 66 85.46 1123 1161.08 7.36
Outsourced Black other 13 9.46 1123 1161.08 0.81
Outsourced Arab 3 4.97 1123 1161.08 0.43
Likely agency White 181 174.99 269 266.54 65.65
Likely agency White other 11 14.10 269 266.54 5.29
Likely agency Black Caribbean 4 4.09 269 266.54 1.53
Likely agency Black African 24 13.74 269 266.54 5.15
Likely agency Mixed other 7 6.88 269 266.54 2.58
Likely agency South Asian 19 24.18 269 266.54 9.07
Likely agency East Asian 1 1.58 269 266.54 0.59
Likely agency Other 19 22.39 269 266.54 8.40
Likely agency Black other 1 1.16 269 266.54 0.44
Likely agency Arab 2 3.42 269 266.54 1.28
High indicators White 206 193.53 291 280.74 68.93
High indicators White other 7 8.37 291 280.74 2.98
High indicators Black Caribbean 4 2.28 291 280.74 0.81
High indicators Black African 18 12.56 291 280.74 4.47
High indicators Mixed other 4 2.50 291 280.74 0.89
High indicators South Asian 30 32.08 291 280.74 11.43
High indicators East Asian 4 6.70 291 280.74 2.39
High indicators Other 14 17.93 291 280.74 6.39
High indicators Black other 2 1.16 291 280.74 0.41
High indicators Arab 2 3.63 291 280.74 1.29
Code
data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(outsourcing_group, Ethnicity_collapsed) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    n = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    N = n()
  ) %>%
  ggplot(., aes(outsourcing_group, Percentage, fill = Ethnicity_collapsed)) +
  geom_col(colour="black") +
  annotate("text", x = ethnicity_statistics$outsourcing_group, y = 75, label = paste0("n=",ethnicity_statistics$N)) +
  coord_flip() +
  scale_fill_manual(values = many_colours, name = "Ethnicity") +
  xlab("Outsourcing group") +
  theme_minimal()

Code
# mod <- glm(outsourcing_status ~ Ethnicity_collapsed, data, family="binomial", weights=NatRepemployees)
# summary(mod)
# 
# coef_table <- data.frame("estimate" = mod[["coefficients"]]) %>%
#   mutate(
#     or = round(exp(estimate),2)
#   )

Paysplit15

Code
ethnicity_summary_paysplit <- data %>%
  group_by(outsourcing_status, income_group, Ethnicity_labelled) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum),
    Ethnicity_short = Ethnicity_labelled
  ) %>%
    separate_wider_delim(Ethnicity_short, 
                         names = c("Ethnicity_short", "Ethnicity detail"), 
                         delim = stringr::regex(" / |, "),  # use multiple delims
                         too_few = "align_start",
                         too_many = "merge")

readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_paysplit_1.csv")
Code
temp_data <-  ethnicity_summary_paysplit %>%
  drop_na(income_group)
  
for(group in unique(temp_data$income_group)){
  plot_data <- temp_data %>%
    filter(income_group==group)

  plot <- plot_data %>%
    ggplot(., aes(Ethnicity_short, Percentage, fill = outsourcing_status)) +
      geom_col(colour="black", position = position_dodge()) +
      #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
      coord_flip() +
      scale_fill_manual(values = many_colours, name = "Ethnicity") +
      xlab("Outsourcing group") +
      theme_minimal() +
    theme(
      legend.position = "bottom"
    ) +
    ggtitle(paste0(group, " income"))
  
  
  print(plot)
}

Code
# ethnicity_summary_paysplit %>%
#   drop_na(income_group) %>%
#   ggplot(., aes(outsourcing_status, Percentage, fill = Ethnicity_short)) +
#   facet_grid(rows=~income_group) +
#   geom_col(colour="black") +
#   #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
#   coord_flip() +
#   scale_fill_manual(values = many_colours, name = "Ethnicity") +
#   xlab("Outsourcing group") +
#   theme_minimal() +
#   theme(
#     legend.position = "bottom"
#   )
Code
ethnicity_summary_paysplit <- data %>%
  group_by(outsourcing_status, income_group, Ethnicity_collapsed) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) 

#readr::write_csv(ethnicity_statistics, file = "../outputs/data/ethnicity_stats_paysplit_1.csv")
Code
temp_data <-  ethnicity_summary_paysplit %>%
  drop_na(income_group)
  
for(group in unique(temp_data$income_group)){
  plot_data <- temp_data %>%
    filter(income_group==group)

  plot <- plot_data %>%
    ggplot(., aes(Ethnicity_collapsed, Percentage, fill = outsourcing_status)) +
      geom_col(colour="black", position = position_dodge()) +
      #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
      coord_flip() +
      scale_fill_manual(values = many_colours, name = "Ethnicity") +
      xlab("Outsourcing group") +
      theme_minimal() +
    theme(
      legend.position = "bottom"
    ) +
    ggtitle(paste0(group, " income"))
  
  
  print(plot)
}

Code
# ethnicity_summary_paysplit %>%
#   drop_na(income_group) %>%
#   ggplot(., aes(outsourcing_status, Percentage, fill = Ethnicity_short)) +
#   facet_grid(rows=~income_group) +
#   geom_col(colour="black") +
#   #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
#   coord_flip() +
#   scale_fill_manual(values = many_colours, name = "Ethnicity") +
#   xlab("Outsourcing group") +
#   theme_minimal() +
#   theme(
#     legend.position = "bottom"
#   )
Code
mod <- glm(outsourcing_status ~ income_group*Ethnicity_collapsed, data, family="binomial")
summary(mod)

Call:
glm(formula = outsourcing_status ~ income_group * Ethnicity_collapsed, 
    family = "binomial", data = data)

Coefficients:
                                                   Estimate Std. Error z value
(Intercept)                                        -1.83224    0.04438 -41.286
income_groupLow                                     0.17246    0.06883   2.506
Ethnicity_collapsedWhite other                      0.33035    0.16107   2.051
Ethnicity_collapsedBlack Caribbean                 -0.19371    0.31031  -0.624
Ethnicity_collapsedBlack African                    1.23328    0.13921   8.859
Ethnicity_collapsedMixed other                      0.23623    0.28662   0.824
Ethnicity_collapsedSouth Asian                      0.92653    0.14299   6.480
Ethnicity_collapsedEast Asian                       0.17932    0.36655   0.489
Ethnicity_collapsedOther                            0.42737    0.18195   2.349
Ethnicity_collapsedBlack other                      1.19039    0.39319   3.027
Ethnicity_collapsedArab                             1.32142    0.51830   2.550
income_groupLow:Ethnicity_collapsedWhite other     -0.13691    0.27257  -0.502
income_groupLow:Ethnicity_collapsedBlack Caribbean  0.55421    0.45290   1.224
income_groupLow:Ethnicity_collapsedBlack African   -0.44101    0.23655  -1.864
income_groupLow:Ethnicity_collapsedMixed other      0.64585    0.41324   1.563
income_groupLow:Ethnicity_collapsedSouth Asian     -0.01517    0.23811  -0.064
income_groupLow:Ethnicity_collapsedEast Asian      -0.12898    0.73289  -0.176
income_groupLow:Ethnicity_collapsedOther            0.33341    0.27581   1.209
income_groupLow:Ethnicity_collapsedBlack other     -0.70927    0.69591  -1.019
income_groupLow:Ethnicity_collapsedArab            -0.35479    1.33094  -0.267
                                                   Pr(>|z|)    
(Intercept)                                         < 2e-16 ***
income_groupLow                                     0.01222 *  
Ethnicity_collapsedWhite other                      0.04027 *  
Ethnicity_collapsedBlack Caribbean                  0.53246    
Ethnicity_collapsedBlack African                    < 2e-16 ***
Ethnicity_collapsedMixed other                      0.40983    
Ethnicity_collapsedSouth Asian                     9.19e-11 ***
Ethnicity_collapsedEast Asian                       0.62469    
Ethnicity_collapsedOther                            0.01883 *  
Ethnicity_collapsedBlack other                      0.00247 ** 
Ethnicity_collapsedArab                             0.01079 *  
income_groupLow:Ethnicity_collapsedWhite other      0.61548    
income_groupLow:Ethnicity_collapsedBlack Caribbean  0.22107    
income_groupLow:Ethnicity_collapsedBlack African    0.06228 .  
income_groupLow:Ethnicity_collapsedMixed other      0.11808    
income_groupLow:Ethnicity_collapsedSouth Asian      0.94921    
income_groupLow:Ethnicity_collapsedEast Asian       0.86030    
income_groupLow:Ethnicity_collapsedOther            0.22673    
income_groupLow:Ethnicity_collapsedBlack other      0.30811    
income_groupLow:Ethnicity_collapsedArab             0.78980    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 8127.6  on 8942  degrees of freedom
Residual deviance: 7956.9  on 8923  degrees of freedom
  (1212 observations deleted due to missingness)
AIC: 7996.9

Number of Fisher Scoring iterations: 4
Code
test_data <- data %>%
  drop_na(income_group) %>%
  mutate(
    income_group = factor(income_group, levels = c("Not low", "Low")),
    Ethnicity_short = Ethnicity_labelled
  ) %>%
  separate_wider_delim(Ethnicity_short, 
                       names = c("Ethnicity_short", "Ethnicity detail"), 
                       delim = " / ", 
                       too_few = "align_start",
                       too_many = "merge") %>%
  mutate(
    Ethnicity_short = forcats::fct_relevel(factor(Ethnicity_short), "English")
  )


test <- glm(outsourcing_status ~ Age + Gender + income_group + Ethnicity_short, family = "binomial", data = test_data)
summary(test)

Call:
glm(formula = outsourcing_status ~ Age + Gender + income_group + 
    Ethnicity_short, family = "binomial", data = test_data)

Coefficients:
                                                                        Estimate
(Intercept)                                                            -1.122623
Age                                                                    -0.024661
GenderMale                                                              0.531731
GenderOther                                                             0.128987
GenderPrefer not to say                                                -0.240452
income_groupLow                                                         0.276827
Ethnicity_shortAfrican                                                  0.750804
Ethnicity_shortAny other Asian background                               0.691922
Ethnicity_shortAny other Black, Black British, or Caribbean background  0.752002
Ethnicity_shortAny other ethnic group                                   0.220800
Ethnicity_shortAny other Mixed                                          0.673451
Ethnicity_shortAny other White background                               0.171750
Ethnicity_shortArab                                                     0.989803
Ethnicity_shortBangladeshi                                              0.703248
Ethnicity_shortCaribbean                                                0.183752
Ethnicity_shortChinese                                                  0.021374
Ethnicity_shortDon’t think of myself as any of these                    1.449163
Ethnicity_shortGypsy or Irish Traveller                                 0.282974
Ethnicity_shortIndian                                                   0.548848
Ethnicity_shortIrish                                                   -0.126226
Ethnicity_shortPakistani                                                0.864916
Ethnicity_shortPrefer not to say                                       -0.006321
Ethnicity_shortRoma                                                     1.082635
Ethnicity_shortWhite and Asian                                         -0.090237
Ethnicity_shortWhite and Black African                                  1.155726
Ethnicity_shortWhite and Black Caribbean                               -0.279236
                                                                       Std. Error
(Intercept)                                                              0.111181
Age                                                                      0.002392
GenderMale                                                               0.061348
GenderOther                                                              0.780590
GenderPrefer not to say                                                  1.056849
income_groupLow                                                          0.061893
Ethnicity_shortAfrican                                                   0.125846
Ethnicity_shortAny other Asian background                                0.258347
Ethnicity_shortAny other Black, Black British, or Caribbean background   0.330047
Ethnicity_shortAny other ethnic group                                    0.647274
Ethnicity_shortAny other Mixed                                           0.264347
Ethnicity_shortAny other White background                                0.135112
Ethnicity_shortArab                                                      0.483591
Ethnicity_shortBangladeshi                                               0.286337
Ethnicity_shortCaribbean                                                 0.300048
Ethnicity_shortChinese                                                   0.319975
Ethnicity_shortDon’t think of myself as any of these                     0.659571
Ethnicity_shortGypsy or Irish Traveller                                  0.843433
Ethnicity_shortIndian                                                    0.163023
Ethnicity_shortIrish                                                     0.292763
Ethnicity_shortPakistani                                                 0.188944
Ethnicity_shortPrefer not to say                                         0.633334
Ethnicity_shortRoma                                                      0.769140
Ethnicity_shortWhite and Asian                                           0.335327
Ethnicity_shortWhite and Black African                                   0.262938
Ethnicity_shortWhite and Black Caribbean                                 0.345023
                                                                       z value
(Intercept)                                                            -10.097
Age                                                                    -10.311
GenderMale                                                               8.667
GenderOther                                                              0.165
GenderPrefer not to say                                                 -0.228
income_groupLow                                                          4.473
Ethnicity_shortAfrican                                                   5.966
Ethnicity_shortAny other Asian background                                2.678
Ethnicity_shortAny other Black, Black British, or Caribbean background   2.278
Ethnicity_shortAny other ethnic group                                    0.341
Ethnicity_shortAny other Mixed                                           2.548
Ethnicity_shortAny other White background                                1.271
Ethnicity_shortArab                                                      2.047
Ethnicity_shortBangladeshi                                               2.456
Ethnicity_shortCaribbean                                                 0.612
Ethnicity_shortChinese                                                   0.067
Ethnicity_shortDon’t think of myself as any of these                     2.197
Ethnicity_shortGypsy or Irish Traveller                                  0.336
Ethnicity_shortIndian                                                    3.367
Ethnicity_shortIrish                                                    -0.431
Ethnicity_shortPakistani                                                 4.578
Ethnicity_shortPrefer not to say                                        -0.010
Ethnicity_shortRoma                                                      1.408
Ethnicity_shortWhite and Asian                                          -0.269
Ethnicity_shortWhite and Black African                                   4.395
Ethnicity_shortWhite and Black Caribbean                                -0.809
                                                                       Pr(>|z|)
(Intercept)                                                             < 2e-16
Age                                                                     < 2e-16
GenderMale                                                              < 2e-16
GenderOther                                                            0.868753
GenderPrefer not to say                                                0.820021
income_groupLow                                                        7.73e-06
Ethnicity_shortAfrican                                                 2.43e-09
Ethnicity_shortAny other Asian background                              0.007400
Ethnicity_shortAny other Black, Black British, or Caribbean background 0.022698
Ethnicity_shortAny other ethnic group                                  0.733011
Ethnicity_shortAny other Mixed                                         0.010847
Ethnicity_shortAny other White background                              0.203669
Ethnicity_shortArab                                                    0.040680
Ethnicity_shortBangladeshi                                             0.014049
Ethnicity_shortCaribbean                                               0.540267
Ethnicity_shortChinese                                                 0.946741
Ethnicity_shortDon’t think of myself as any of these                   0.028011
Ethnicity_shortGypsy or Irish Traveller                                0.737246
Ethnicity_shortIndian                                                  0.000761
Ethnicity_shortIrish                                                   0.666357
Ethnicity_shortPakistani                                               4.70e-06
Ethnicity_shortPrefer not to say                                       0.992037
Ethnicity_shortRoma                                                    0.159252
Ethnicity_shortWhite and Asian                                         0.787852
Ethnicity_shortWhite and Black African                                 1.11e-05
Ethnicity_shortWhite and Black Caribbean                               0.418329
                                                                          
(Intercept)                                                            ***
Age                                                                    ***
GenderMale                                                             ***
GenderOther                                                               
GenderPrefer not to say                                                   
income_groupLow                                                        ***
Ethnicity_shortAfrican                                                 ***
Ethnicity_shortAny other Asian background                              ** 
Ethnicity_shortAny other Black, Black British, or Caribbean background *  
Ethnicity_shortAny other ethnic group                                     
Ethnicity_shortAny other Mixed                                         *  
Ethnicity_shortAny other White background                                 
Ethnicity_shortArab                                                    *  
Ethnicity_shortBangladeshi                                             *  
Ethnicity_shortCaribbean                                                  
Ethnicity_shortChinese                                                    
Ethnicity_shortDon’t think of myself as any of these                   *  
Ethnicity_shortGypsy or Irish Traveller                                   
Ethnicity_shortIndian                                                  ***
Ethnicity_shortIrish                                                      
Ethnicity_shortPakistani                                               ***
Ethnicity_shortPrefer not to say                                          
Ethnicity_shortRoma                                                       
Ethnicity_shortWhite and Asian                                            
Ethnicity_shortWhite and Black African                                 ***
Ethnicity_shortWhite and Black Caribbean                                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 7918.6  on 8739  degrees of freedom
Residual deviance: 7567.2  on 8714  degrees of freedom
  (203 observations deleted due to missingness)
AIC: 7619.2

Number of Fisher Scoring iterations: 4

Another way of looking at this is to calculate, for each ethnicity, the proportion of workers in each outsourcing group. Doing so yields the plot below.16

Code
ethnicity_statistics <- data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(Ethnicity, outsourcing_status) %>%
  summarise(
    Frequency = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) %>%
  rename(
    `Outsourcing group` = outsourcing_status
  )

ethnicity_statistics %>%
  mutate(
    Ethnicity = haven::as_factor(Ethnicity)
    ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
Ethnicity Outsourcing group Frequency Sum Percentage
English / Welsh / Scottish / Northern Irish / British Not outsourced 6623 7747 85.49
English / Welsh / Scottish / Northern Irish / British Outsourced 1124 7747 14.51
Irish Not outsourced 111 128 86.72
Irish Outsourced 17 128 13.28
Gypsy or Irish Traveller Not outsourced 6 8 75.00
Gypsy or Irish Traveller Outsourced 2 8 25.00
Roma Not outsourced 4 7 57.14
Roma Outsourced 3 7 42.86
Any other White background Not outsourced 369 449 82.18
Any other White background Outsourced 80 449 17.82
White and Black Caribbean Not outsourced 84 95 88.42
White and Black Caribbean Outsourced 11 95 11.58
White and Black African Not outsourced 40 66 60.61
White and Black African Outsourced 26 66 39.39
White and Asian Not outsourced 61 73 83.56
White and Asian Outsourced 12 73 16.44
Any other Mixed / Multiple ethnic background Not outsourced 59 82 71.95
Any other Mixed / Multiple ethnic background Outsourced 23 82 28.05
Indian Not outsourced 170 225 75.56
Indian Outsourced 55 225 24.44
Pakistani Not outsourced 93 142 65.49
Pakistani Outsourced 49 142 34.51
Bangladeshi Not outsourced 47 68 69.12
Bangladeshi Outsourced 21 68 30.88
Chinese Not outsourced 70 82 85.37
Chinese Outsourced 12 82 14.63
Any other Asian background Not outsourced 57 83 68.67
Any other Asian background Outsourced 26 83 31.33
African Not outsourced 232 343 67.64
African Outsourced 111 343 32.36
Caribbean Not outsourced 74 89 83.15
Caribbean Outsourced 15 89 16.85
Any other Black, Black British, or Caribbean background Not outsourced 36 52 69.23
Any other Black, Black British, or Caribbean background Outsourced 16 52 30.77
Arab Not outsourced 12 19 63.16
Arab Outsourced 7 19 36.84
Any other ethnic group Not outsourced 13 16 81.25
Any other ethnic group Outsourced 3 16 18.75
Don’t think of myself as any of these Not outsourced 7 12 58.33
Don’t think of myself as any of these Outsourced 5 12 41.67
Prefer not to say Not outsourced 22 26 84.62
Prefer not to say Outsourced 4 26 15.38
NA Not outsourced 282 343 82.22
NA Outsourced 61 343 17.78
Code
# ordering_df <- ethnicity_statistics %>%
#   filter(`Outsourcing group` == "Outsourced") %>%
#   mutate(
#     Ethnicity = haven::as_factor(Ethnicity),
#     Ethnicity = factor(Ethnicity),
#     Ethnicity = forcats::fct_reorder(Ethnicity, Percentage)
#   )

ethnicity_statistics %>%
  mutate(
    Ethnicity = haven::as_factor(Ethnicity)
    ) %>%
  ggplot(., aes(Ethnicity, Percentage, fill = `Outsourcing group`)) +
  geom_col(colour="black") +
  annotate("text", x = ethnicity_statistics$Ethnicity, y = 75, label = paste0("n=", ethnicity_statistics$Sum)) +
  coord_flip() +
  scale_fill_manual(values=many_colours, name = "Ethnicity")

Code
write_csv(ethnicity_statistics, file="../outputs/data/ethnicity_stats_2.csv")

Arrival in the UK

Code
bornuk_statistics <- data %>%
  # get values of labels
  mutate_all(haven::as_factor) %>%
  group_by(outsourcing_status, BORNUK) %>%
  summarise(
    Frequency = n()
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  )

readr::write_csv(bornuk_statistics, file="../outputs/data/arrival_in_UK_stats.csv")
Code
categories <- as.vector(unique(haven::as_factor(data$BORNUK)))
non_categories <- categories[!(categories %in% "I was born in the UK")]

# Will throw NA warning. I think this OK but investigate how to avoid the problem
summary_table <- data %>%
  mutate(
    BORNUK = haven::as_factor(BORNUK)
  ) %>%
  mutate(
    BORNUK = forcats::fct_collapse(as.character(BORNUK),
                                      "Born in UK" = "I was born in the UK",
                                      "Not born in UK" = non_categories)
  ) %>%
  group_by(outsourcing_status, BORNUK) %>%
  summarise(
    n = n()
  ) %>%
  mutate(
    Sum = sum(n),
    Percentage = 100 * (n / Sum)
  )

domain <- "BORNUK"
category_1 <- "Born in UK"
category_2 <- "Not born in UK"
  
group_1 <- t(tibble("present"=summary_table[which(summary_table[domain]==category_1 & 
                                 summary_table["outsourcing_status"]=="Outsourced"),"n"],
"not present" = summary_table[which(summary_table[domain]==category_2 & 
                                 summary_table["outsourcing_status"]=="Outsourced"),"n"]
))

group_2 <- t(tibble("present"=summary_table[which(summary_table[domain]==category_1 & 
                                 summary_table["outsourcing_status"]=="Not outsourced"),"n"],
"not present" = summary_table[which(summary_table[domain]==category_2 & 
                                 summary_table["outsourcing_status"]=="Not outsourced"),"n"]
))


comp_mat <- as.matrix(cbind(group_2, group_1)) # matrix for crosstable
x2 <- gmodels::CrossTable(comp_mat, fisher=TRUE, chisq = TRUE)
 # (chi-square = `r round(x2[["chisq"]][["statistic"]][["X-squared"]],2)`, *p* = `r ifelse(x2[["chisq"]][["p.value"]] < .001, "< .001", paste0("= ", round(``x2[["chisq"]][["p.value"]],2))`).

A greater proportion of outsourced workers were not born in the UK (24.06%) compared to non-outsourced workers (13.6%).17 This difference is statistically significant; outsourced workers are 2.01 times more likely to have been born outside the UK than non-outsourced workers.

Looking at the figure below, it appears that no particular arrival time is especially common amongst the outsourced group, with a relatively equal distribution across arrival times (though potentially a slightly larger proportion fall into the ‘Within the last 10 years category’). The is broadly the case for the likely agency and high indicators groups too, though note that amongst likely agency there is a slightly larger proportion of workers who have arrived within the last year.

Code
bornuk_statistics %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
outsourcing_status BORNUK Frequency Sum Percentage
Not outsourced I was born in the UK 7320 8472 86.40
Not outsourced Within the last year 115 8472 1.36
Not outsourced Within the last 3 years 135 8472 1.59
Not outsourced Within the last 5 years 116 8472 1.37
Not outsourced Within the last 10 years 183 8472 2.16
Not outsourced Within the last 15 years 137 8472 1.62
Not outsourced Within the last 20 years 127 8472 1.50
Not outsourced Within the last 30 years 103 8472 1.22
Not outsourced More than 30 years ago 149 8472 1.76
Not outsourced Prefer not to say 87 8472 1.03
Outsourced I was born in the UK 1278 1683 75.94
Outsourced Within the last year 65 1683 3.86
Outsourced Within the last 3 years 45 1683 2.67
Outsourced Within the last 5 years 40 1683 2.38
Outsourced Within the last 10 years 71 1683 4.22
Outsourced Within the last 15 years 53 1683 3.15
Outsourced Within the last 20 years 44 1683 2.61
Outsourced Within the last 30 years 14 1683 0.83
Outsourced More than 30 years ago 39 1683 2.32
Outsourced Prefer not to say 34 1683 2.02
Code
bornuk_statistics %>%
  ggplot(., aes(outsourcing_status, Percentage, fill = BORNUK)) +
  geom_col(colour="black") +
  annotate("text", x = bornuk_statistics$outsourcing_status, y = 75, label = paste0("n=",bornuk_statistics$Sum)) +
  coord_flip() +
  scale_fill_manual(values=many_colours, name="Arrival in UK") +
  theme_minimal() +
  xlab("Outsourcing group") 

Paysplit

Code
bornuk_summary_paysplit <- data %>%
  group_by(outsourcing_status, income_group, BORNUK_labelled) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  )

readr::write_csv(bornuk_summary_paysplit, file = "../outputs/data/bornuk_stats_paysplit_1.csv")
Code
mod <- glm(outsourcing_status ~ income_group * BORNUK_labelled, data, weights = NatRepemployees, family="binomial")
summary(mod)

Call:
glm(formula = outsourcing_status ~ income_group * BORNUK_labelled, 
    family = "binomial", data = data, weights = NatRepemployees)

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                             -1.80357    0.04344
income_groupLow                                          0.22862    0.06439
BORNUK_labelledWithin the last year                      1.28978    0.25307
BORNUK_labelledWithin the last 3 years                   0.68932    0.23063
BORNUK_labelledWithin the last 5 years                   0.54565    0.24814
BORNUK_labelledWithin the last 10 years                  0.74566    0.18405
BORNUK_labelledWithin the last 15 years                  1.05016    0.18554
BORNUK_labelledWithin the last 20 years                  0.99745    0.20348
BORNUK_labelledWithin the last 30 years                 -1.33247    0.53396
BORNUK_labelledMore than 30 years ago                    0.43879    0.23670
BORNUK_labelledPrefer not to say                         0.84084    0.44469
income_groupLow:BORNUK_labelledWithin the last year     -0.38239    0.35255
income_groupLow:BORNUK_labelledWithin the last 3 years  -0.41813    0.39148
income_groupLow:BORNUK_labelledWithin the last 5 years   0.12118    0.42110
income_groupLow:BORNUK_labelledWithin the last 10 years  0.24893    0.28633
income_groupLow:BORNUK_labelledWithin the last 15 years -0.95025    0.38000
income_groupLow:BORNUK_labelledWithin the last 20 years -0.84991    0.43888
income_groupLow:BORNUK_labelledWithin the last 30 years  1.75447    0.69940
income_groupLow:BORNUK_labelledMore than 30 years ago    0.54466    0.35281
income_groupLow:BORNUK_labelledPrefer not to say        -0.12256    0.63991
                                                        z value Pr(>|z|)    
(Intercept)                                             -41.516  < 2e-16 ***
income_groupLow                                           3.551 0.000384 ***
BORNUK_labelledWithin the last year                       5.096 3.46e-07 ***
BORNUK_labelledWithin the last 3 years                    2.989 0.002800 ** 
BORNUK_labelledWithin the last 5 years                    2.199 0.027883 *  
BORNUK_labelledWithin the last 10 years                   4.051 5.09e-05 ***
BORNUK_labelledWithin the last 15 years                   5.660 1.51e-08 ***
BORNUK_labelledWithin the last 20 years                   4.902 9.49e-07 ***
BORNUK_labelledWithin the last 30 years                  -2.495 0.012579 *  
BORNUK_labelledMore than 30 years ago                     1.854 0.063776 .  
BORNUK_labelledPrefer not to say                          1.891 0.058647 .  
income_groupLow:BORNUK_labelledWithin the last year      -1.085 0.278074    
income_groupLow:BORNUK_labelledWithin the last 3 years   -1.068 0.285484    
income_groupLow:BORNUK_labelledWithin the last 5 years    0.288 0.773525    
income_groupLow:BORNUK_labelledWithin the last 10 years   0.869 0.384622    
income_groupLow:BORNUK_labelledWithin the last 15 years  -2.501 0.012396 *  
income_groupLow:BORNUK_labelledWithin the last 20 years  -1.937 0.052802 .  
income_groupLow:BORNUK_labelledWithin the last 30 years   2.509 0.012123 *  
income_groupLow:BORNUK_labelledMore than 30 years ago     1.544 0.122644    
income_groupLow:BORNUK_labelledPrefer not to say         -0.192 0.848114    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 8150.5  on 8942  degrees of freedom
Residual deviance: 7991.6  on 8923  degrees of freedom
  (1212 observations deleted due to missingness)
AIC: 8937.4

Number of Fisher Scoring iterations: 5
Code
# To me this indicates that htere is main effect - arrival time pre
sjPlot::plot_model(mod, type = "int", legend.title = "")

Results of a glm suggest that any arrival time positively predicts outsroucing status, apart from ‘within the last 15 years’ and ‘within the last 30 years’. Takeaway is that people having migrated in the past 20 years are more likely to be outsourced than people born in the uk. People having migrated in the past 15 years are less likely to be outsourced if they’re in the low income group, whilst people having migrated in the past 30 years are more likely to be outsourced if they’re in the low income group. I would take caution in interpreting these interaction results in isolation as they may be influence by other factors (e.g., ethnicity).

Note

We should test this with a more complex model that includes covariates

The plot below shows the percentage of outrouced and non-outsourced people by income group and arrival time.

Code
temp_data <-  bornuk_summary_paysplit %>%
  drop_na(income_group)
  
for(group in unique(temp_data$income_group)){
  plot_data <- temp_data %>%
    filter(income_group==group)

  plot <- plot_data %>%
    ggplot(., aes(BORNUK_labelled, Percentage, fill = outsourcing_status)) +
      geom_col(colour="black", position = position_dodge()) +
      #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
      coord_flip() +
      scale_fill_manual(values = many_colours, name = "Ethnicity") +
      xlab("Outsourcing group") +
      theme_minimal() +
    theme(
      legend.position = "bottom"
    ) +
    ggtitle(paste0(group, " income"))
  
  
  print(plot)
}

Code
# ethnicity_summary_paysplit %>%
#   drop_na(income_group) %>%
#   ggplot(., aes(outsourcing_status, Percentage, fill = Ethnicity_short)) +
#   facet_grid(rows=~income_group) +
#   geom_col(colour="black") +
#   #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
#   coord_flip() +
#   scale_fill_manual(values = many_colours, name = "Ethnicity") +
#   xlab("Outsourcing group") +
#   theme_minimal() +
#   theme(
#     legend.position = "bottom"
#   )

Interaction: Ethnicity and arrival in UK18

Code
temp_df <- data %>%
  # filter(outsourcing_status=="Outsourced") %>%
  mutate(
    Ethnicity = haven::as_factor(Ethnicity),
    BORNUK = haven::as_factor(BORNUK)
  )

# mytable <- table(temp_df$BORNUK,temp_df$Ethnicity)
# tab <- as.data.frame(prop.table(mytable))

# int_summary <- temp_df %>%
#   group_by(BORNUK, Ethnicity) %>%
#   summarise(
#     Frequency = sum(NatRepemployees)
#   ) %>%
#   mutate(
#     Percentage = 100 * (Frequency/sum(Frequency))
#   )
# 
# int_summary %>%
#   ggplot(., aes(BORNUK, Percentage, fill = Ethnicity)) +
#   geom_col() +
#   coord_flip() +
#   theme(legend.position = "none") +
#   scale_fill_manual(values = many_colours)
# 
# int_summary %>%
#   ggplot(., aes(BORNUK, Percentage, fill = Ethnicity)) +
#   geom_col() +
#   coord_flip() +
#   scale_fill_manual(values = many_colours)
#   # theme(legend.position = "none")


int_summary_2 <- temp_df %>%
  group_by(outsourcing_status, Ethnicity, BORNUK) %>%
  summarise(
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    Percentage = 100 * (Frequency/sum(Frequency))
  )

int_summary_2 %>%
  ggplot(., aes(Ethnicity, Percentage, fill = BORNUK)) +
  facet_grid(rows=vars(outsourcing_status)) +
  geom_col() +
  coord_flip() +
  theme(legend.position = "none") +
  scale_fill_manual(values = many_colours)

Code
int_summary_2 %>%
  ggplot(., aes(Ethnicity, Percentage, fill = BORNUK)) +
    facet_grid(rows=vars(outsourcing_status)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = many_colours)

Code
  # theme(legend.position = "none")

readr::write_csv(int_summary_2, file="../outputs/data/interaction_ethnicity_arrival_in_UK_stats.csv")

For the following ethnicites, more than 50% were born in the UK:

  • NA
  • Prefer not to say
  • Any other Black, Black British, or Caribbean background
  • Caribbean
  • Pakistani
  • Any other Mixed/Multiple ethnic background
  • White and Asian
  • White and Black Caribbean
  • Gypsy or Irish Traveller
  • Irish
  • English/Welsh/Scottish/Northern Irish/British

For the following ethnicities around 50% were born in the UK, with remaining 50% split across arrival times:

  • Don’t think of myself as any of these (second largest proportion within the last 15 years)
  • Any other ethnic group (2nd largest proportion wihtin the last 10 years)
  • Bangladeshi (2nd largest proprotion prefer not to say)
  • Indian (2nd largest proportion wthin the last 3 years)
  • White and Black African (2nd largest proportion within the last 3 years)

For the following ethnicities, less than 50% were born in the UK:

  • Arab
  • African (largest proportion within the last year)
  • Any other Asian background (2nd an 3rd largest proportions are within the last year and within the last 15 years)
  • Chinese (2nd and 3rd largest proportions are within the last 3 years, wihtin the last 15 years)
  • Any other White background (largest proportion within the last 10 years)
  • Roma (largest proportion within the last 5 years)
Code
  # filter(outsourcing_status=="Outsourced") %>%
  # mutate(
  #   Ethnicity = haven::as_factor(Ethnicity),
  #   BORNUK = haven::as_factor(BORNUK)
  # )

# mytable <- table(temp_df$BORNUK,temp_df$Ethnicity)
# tab <- as.data.frame(prop.table(mytable))

# int_summary <- temp_df %>%
#   group_by(BORNUK, Ethnicity) %>%
#   summarise(
#     Frequency = sum(NatRepemployees)
#   ) %>%
#   mutate(
#     Percentage = 100 * (Frequency/sum(Frequency))
#   )
# 
# int_summary %>%
#   ggplot(., aes(BORNUK, Percentage, fill = Ethnicity)) +
#   geom_col() +
#   coord_flip() +
#   theme(legend.position = "none") +
#   scale_fill_manual(values = many_colours)
# 
# int_summary %>%
#   ggplot(., aes(BORNUK, Percentage, fill = Ethnicity)) +
#   geom_col() +
#   coord_flip() +
#   scale_fill_manual(values = many_colours)
#   # theme(legend.position = "none")


int_summary_2 <- data %>%
  group_by(outsourcing_status, Ethnicity_collapsed, BORNUK_labelled) %>%
  summarise(
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    Percentage = 100 * (Frequency/sum(Frequency))
  )

# int_summary_2 %>%
#   ggplot(., aes(Ethnicity_collapsed, Percentage, fill = BORNUK_labelled)) +
#   facet_grid(rows=vars(outsourcing_status)) +
#   geom_col() +
#   coord_flip() +
#   theme(legend.position = "none") +
#   scale_fill_manual(values = many_colours)

int_summary_2 %>%
  ggplot(., aes(Ethnicity_collapsed, Percentage, fill = BORNUK_labelled)) +
    facet_grid(rows=vars(outsourcing_status)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = many_colours)

Code
  # theme(legend.position = "none")
Code
data <- data %>%
  mutate(
    BORNUK_collapsed = forcats::fct_collapse(BORNUK_labelled,
                                            "Born in UK" = "I was born in the UK",
                                            "Came to UK recently" = c("Within the last year",
                                                                       "Within the last 3 years",
                                                                       "Within the last 5 years",
                                                                       "Within the last 10 years"),
                                            "Came to UK not recently" =  c("Within the last 15 years",
                                                                           "Within the last 20 years",
                                                                           "Within the last 30 years",
                                                                           "More than 30 years ago"),
                                            "Prefer not to say" = c("Prefer not to say")
    )
  )

int_summary_3 <- data %>%
  group_by(outsourcing_status, Ethnicity_collapsed, BORNUK_collapsed) %>%
  summarise(
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    Percentage = 100 * (Frequency/sum(Frequency))
  )

int_summary_3 %>%
  ggplot(., aes(Ethnicity_collapsed, Percentage, fill = BORNUK_collapsed)) +
    facet_grid(rows=vars(outsourcing_status)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = many_colours)

Code
# mod <- glm(outsourcing_status ~ Ethnicity_collapsed*BORNUK_collapsed, data, family="binomial", weight = NatRepemployees)
# summary(mod)
# emmeans(mod, specs = "Ethnicity_collapsed", by ="BORNUK_collapsed")
# sjPlot::plot_model(mod, type = "int", legend.title = "", terms = c("outsourcing_status","BORNUK_collapsed","Ethnicity_collapsed ['Black African]"))
#   

Gender19

Code
gender_statistics <- data %>%
  # get values of labels
  mutate_all(haven::as_factor) %>%
  group_by(outsourcing_status, Gender) %>%
  summarise(
    Frequency = n()
  ) %>%
  mutate(
    Percentage = 100 * (Frequency / sum(Frequency))
  )

readr::write_csv(gender_statistics, file="../outputs/data/gender_statistics.csv")
Code
gender_statistics %>%
  ggplot(., aes(outsourcing_status, Percentage, fill = Gender)) +
  geom_col(colour="black") +
  # annotate("text", x = gender_statistics$outsourcing_status, y = 75, label = paste0("n=", gender_statistics$Frequency)) +
  coord_flip() +
  scale_fill_manual(values=colours) +
  theme_minimal() +
  xlab("Outsourcing group") 

Code
categories <- as.vector(unique(haven::as_factor(data$Gender)))
non_categories <- categories[!(categories %in% "Male")]

# Will throw NA warning. I think this OK but investigate how to avoid the problem
summary_table <- data %>%
  mutate(
    Gender = haven::as_factor(Gender)
  ) %>%
  mutate(
    Gender = forcats::fct_collapse(as.character(Gender),
                                      "Male" = "Male",
                                      "Not male" = non_categories)
  ) %>%
  group_by(outsourcing_status, Gender) %>%
  summarise(
    n = n()
  ) %>%
  mutate(
    Sum = sum(n),
    Percentage = 100 * (n / Sum)
  )

domain <- "Gender"
category_1 <- "Male"
category_2 <- "Not male"
  
group_1 <- t(tibble("present"=summary_table[which(summary_table[domain]==category_1 & 
                                 summary_table["outsourcing_status"]=="Not outsourced"),"n"],
"not present" = summary_table[which(summary_table[domain]==category_2 & 
                                 summary_table["outsourcing_status"]=="Not outsourced"),"n"]
))

group_2 <- t(tibble("present"=summary_table[which(summary_table[domain]==category_1 & 
                                 summary_table["outsourcing_status"]=="Outsourced"),"n"],
"not present" = summary_table[which(summary_table[domain]==category_2 & 
                                 summary_table["outsourcing_status"]=="Outsourced"),"n"]
))


comp_mat <- as.matrix(cbind(group_2, group_1)) # matrix for crosstable
x2 <- gmodels::CrossTable(comp_mat, fisher=TRUE, chisq = TRUE)

In terms of Gender, the outsourced group has a larger proportion of males (57.81% compared to 46.4%). This difference is statistically significant; outsourced workers are 1.58 times more likely to have be male than non-outsourced workers.

Paysplit

Code
gender_summary_paysplit <- data %>%
  group_by(outsourcing_status, income_group, Gender) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  )

readr::write_csv(ethnicity_statistics, file = "../outputs/data/gender_stats_paysplit_1.csv")
Code
mod <- glm(outsourcing_status ~ Gender * income_group, data, family="binomial", weights = NatRepemployees)
summary(mod)

Call:
glm(formula = outsourcing_status ~ Gender * income_group, family = "binomial", 
    data = data, weights = NatRepemployees)

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)
(Intercept)                             -2.01977    0.06791 -29.741  < 2e-16
GenderMale                               0.56260    0.08168   6.888 5.67e-12
GenderOther                              0.44378    0.96305   0.461   0.6449
GenderPrefer not to say                  0.65109    0.69214   0.941   0.3469
income_groupLow                          0.43123    0.08751   4.928 8.33e-07
GenderMale:income_groupLow              -0.24070    0.11969  -2.011   0.0443
GenderOther:income_groupLow             -0.10606    1.38352  -0.077   0.9389
GenderPrefer not to say:income_groupLow -0.65321    1.10050  -0.594   0.5528
                                           
(Intercept)                             ***
GenderMale                              ***
GenderOther                                
GenderPrefer not to say                    
income_groupLow                         ***
GenderMale:income_groupLow              *  
GenderOther:income_groupLow                
GenderPrefer not to say:income_groupLow    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 8150.5  on 8942  degrees of freedom
Residual deviance: 8076.3  on 8935  degrees of freedom
  (1212 observations deleted due to missingness)
AIC: 9001.4

Number of Fisher Scoring iterations: 4
Code
sjPlot::plot_model(mod, type = "int")

A glm finds that income group is a significant factor for females but not for males. Whilst males are more likely than females overall to be outsourced, females are significantly more likely to be outsourced if they are in the low income group than if they are in the not-low income group. The plots below show the percentage of outsourced workers by income group and gender.

Code
temp_data <-  gender_summary_paysplit %>%
  drop_na(income_group)
  
for(group in unique(temp_data$income_group)){
  plot_data <- temp_data %>%
    filter(income_group==group)

  plot <- plot_data %>%
    ggplot(., aes(Gender, Percentage, fill = outsourcing_status)) +
      geom_col(colour="black", position = position_dodge()) +
      #annotate("text", x = ethnicity_statistics$outsourcing_status, y = 75, label = paste0("n=",ethnicity_statistics$Sum)) +
      coord_flip() +
      scale_fill_manual(values = many_colours, name = "Ethnicity") +
      xlab("Outsourcing group") +
      theme_minimal() +
    theme(
      legend.position = "bottom"
    ) +
    ggtitle(paste0(group, " income"))
  
  
  print(plot)
}

Region20

Let’s cross check the size of the employed workforce across regions, and compare this to how many people are in each region in our sample. The percentages should work out the same if they’re weighted.

The tables below show that our sample is weighted by region. The weighted percentage of our sampled workers in each region matches the percentages from the ONS employment by region tables. This means that the weighted percentage of workers (and therefore outsourced workers) in our sample can be considered to be representative of the national picture.

Code
our_sample <- data %>%
  group_by(Region) %>%
  summarise(
    n = n(), 
    sample_sum = sum(NatRepemployees)
  ) %>%
  mutate(
    perc = 100 * (n / sum(n)),
    sample_wtd_perc = 100 * (sample_sum/sum(sample_sum))
  ) %>% 
  arrange(desc(sample_wtd_perc))

denoms <- rgn_empl_denoms %>%
  mutate(
    employment_data_perc = 100 * (Employed / sum(Employed))
  ) %>% 
  select(-Weight) %>%
  arrange(desc(employment_data_perc))

combined <- merge(denoms,our_sample, by="Region") %>%
  select(c(Region, Employed, sample_sum, employment_data_perc, sample_wtd_perc)) %>%
  arrange(desc(sample_wtd_perc))

# our_sample %>% 
#   kable() %>%
#   kable_styling()
# 
# denoms %>%
#   kable() %>%
#   kable_styling()

combined %>%
  kable() %>%
  kable_styling()
Region Employed sample_sum employment_data_perc sample_wtd_perc
London 4829298851 1431.0519 14.557314 14.092092
South East 4753437758 1420.8114 14.328640 13.991250
North West 3583507881 1095.8237 10.802034 10.790977
West Midlands 2882955040 879.5148 8.690305 8.660903
Scotland 2650815533 853.0980 7.990550 8.400768
South West 2802120873 837.8587 8.446641 8.250701
East Midlands 2359055655 728.1845 7.111077 7.170699
Wales 1436559071 461.0781 4.330327 4.540405
North East 1215820684 386.9397 3.664938 3.810337
Northern Ireland 873773187 275.2233 2.633879 2.710224

The plot below shows the distribution of outsourced and non outsourced workers across regions. It suggests that an outsourced worker is more likely to be based in London than a non-outsourced worker.

Code
region_statistics <- data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(outsourcing_status, Region) %>%
  summarise(
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) %>%
  rename(
    `Outsourcing status` = outsourcing_status
  )

region_statistics %>%
  mutate(
    Region = haven::as_factor(Region)
    ) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)
Outsourcing status Region Frequency Sum Percentage
Not outsourced East Midlands 587.69 8446.64 6.96
Not outsourced East of England 841.35 8446.64 9.96
Not outsourced London 1073.70 8446.64 12.71
Not outsourced North East 333.88 8446.64 3.95
Not outsourced North West 906.43 8446.64 10.73
Not outsourced Northern Ireland 231.66 8446.64 2.74
Not outsourced Scotland 727.28 8446.64 8.61
Not outsourced South East 1232.34 8446.64 14.59
Not outsourced South West 717.36 8446.64 8.49
Not outsourced Wales 377.83 8446.64 4.47
Not outsourced West Midlands 718.03 8446.64 8.50
Not outsourced Yorkshire and the Humber 699.11 8446.64 8.28
Outsourced East Midlands 140.50 1708.36 8.22
Outsourced East of England 125.49 1708.36 7.35
Outsourced London 357.35 1708.36 20.92
Outsourced North East 53.06 1708.36 3.11
Outsourced North West 189.39 1708.36 11.09
Outsourced Northern Ireland 43.56 1708.36 2.55
Outsourced Scotland 125.82 1708.36 7.37
Outsourced South East 188.47 1708.36 11.03
Outsourced South West 120.50 1708.36 7.05
Outsourced Wales 83.25 1708.36 4.87
Outsourced West Midlands 161.49 1708.36 9.45
Outsourced Yorkshire and the Humber 119.46 1708.36 6.99
Code
region_statistics %>%
  mutate(
    Region = haven::as_factor(Region)
    ) %>%
  ggplot(., aes(`Outsourcing status`, Percentage, fill = Region)) +
  geom_col(colour="black") +
  coord_flip() +
  scale_fill_manual(values=many_colours) +
  theme_minimal()

Code
readr::write_csv(region_statistics, file="../outputs/data/region_statistics.csv")

In the plot below the percentages have been scaled to the size of the working population in the region as a function of the total working population in the UK. I need to check whether this scaling is actually necessary, given we are already using weighted data.21 Does the weighting process account for region?

Below we calculate the number of outsourced workers within each region.

Code
region_statistics_2 <- data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(Region, outsourcing_status) %>%
  summarise(
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) %>%
  rename(
    `Outsourcing status` = outsourcing_status
  )

region_statistics_2 %>%
  ggplot(., aes(Region, Percentage, fill = `Outsourcing status`)) +
  geom_col(colour="black") +
  coord_flip() +
  scale_fill_manual(values=many_colours) +
  theme_minimal()

Code
readr::write_csv(region_statistics_2, file = "../outputs/data/region_stats_2.csv")

region_statistics_2_1 <- region_statistics_2 %>% 
  filter(`Outsourcing status` == "Outsourced" & Region != "London")

london_perc <- region_statistics_2[which(region_statistics_2$Region == "London" & region_statistics_2["Outsourcing status"] == "Outsourced"), "Percentage"]

Visualised on a map:

Code
knitr::include_graphics('../outputs/figures/outsourcing_by_region.svg')

2024-05-17T18:22:58.469208 image/svg+xml Matplotlib v3.8.2, https://matplotlib.org/

Visualised on a map, excluding London to enable us to differentiate better between other regions:

Code
knitr::include_graphics('../outputs/figures/outsourcing_by_region_excl_london.svg')

2024-05-17T18:22:59.931438 image/svg+xml Matplotlib v3.8.2, https://matplotlib.org/

As we can see, London has the highest proportion of outsourced workers (25%). After London, the regions with the highest proportion of outsourced workers are:

  1. East Midlands (19%)
  2. West Midlands (18%)
  3. Wales (18%)

Age

Outsourcing status

Code
age_statistics <- data %>%
  group_by(outsourcing_status) %>%
  summarise(
    mean = weighted.mean(Age, w = NatRepemployees, na.rm = T),
    median = wtd.quantile(Age, w = NatRepemployees, probs = c(.5), na.rm = T),
    min = wtd.quantile(Age, w = NatRepemployees, probs = c(0), na.rm = T),
    max = wtd.quantile(Age, w = NatRepemployees, probs = c(1), na.rm = T),
    stdev = sqrt(wtd.var(Age, w = NatRepemployees, na.rm = T))
  )

readr::write_csv(age_statistics, file = "../outputs/data/age_stats.csv")

As shown in the table below, the median age of the outsourced group is 36 , compared to 43 for the not outsourced group.22

However, as the next figure shows, the age distribution is different for the outsourced and high indicator groups compared to the not outsourced and likely agency groups; the outsourced and high indicator groups have higher proportions of younger people (~21-36 year olds).

A t-test indicates that on average, outsourced workers are significantly younger than non-outsourced workers (t(2399.2) = 11.95, p = 0).

Code
knitr::kable(age_statistics, 
             digits = 2, 
             col.names = c("Outsourcing group",
                           "Mean",
                           "Median",
                           "Min",
                           "Max",
                           "Standard dev.")) %>%
  kable_styling(full_width = F)
Outsourcing group Mean Median Min Max Standard dev.
Not outsourced 42.80 43 16 80 13.08
Outsourced 38.63 36 16 78 13.07
Code
data %>%
  mutate(
    Age = as.numeric(as.character(as_factor(Age)))
  ) %>%
  ggplot(.,aes(Age, colour = outsourcing_status, fill = outsourcing_status)) +
  geom_density(alpha = 0.3) +
  geom_vline(data =age_statistics, aes(xintercept=median, colour = outsourcing_status)) +
  scale_x_continuous(breaks = seq(min(age_statistics$min), max(age_statistics$max),5)) +
  theme_minimal() +
  scale_colour_manual(values=colours, name = "Outsourcing status") +
  scale_fill_manual(values=colours, name = "Outsourcing status")

Outsourcing group23

Code
age_statistics_2 <- data %>%
  group_by(outsourcing_group) %>%
  summarise(
    mean = weighted.mean(Age, w = NatRepemployees, na.rm = T),
    median = wtd.quantile(Age, w = NatRepemployees, probs = c(.5), na.rm = T),
    min = wtd.quantile(Age, w = NatRepemployees, probs = c(0), na.rm = T),
    max = wtd.quantile(Age, w = NatRepemployees, probs = c(1), na.rm = T),
    stdev = sqrt(wtd.var(Age, w = NatRepemployees, na.rm = T))
  )

readr::write_csv(age_statistics_2, file = "../outputs/data/age_stats_2.csv")

Exploring the age distribution for the different outsourced groups, the high density concentration of slightly younger workers identified above appears to be driven primarily by the ‘outsourced’ and ‘high indicator’ groups. The ‘likely agency’ group follows a similar pattern, but has a lower density peak than the other groups, with a higher density of workers of more advanced ages.

Code
knitr::kable(age_statistics_2, 
             digits = 2, 
             col.names = c("Outsourcing group",
                           "Mean",
                           "Median",
                           "Min",
                           "Max",
                           "Standard dev.")) %>%
  kable_styling(full_width = F)
Outsourcing group Mean Median Min Max Standard dev.
Not outsourced 42.80 43 16 80 13.08
Outsourced 38.40 35 16 78 13.09
Likely agency 39.80 38 18 77 13.49
High indicators 38.49 35 18 72 12.55
Code
data %>%
  ggplot(.,aes(Age, colour = outsourcing_group, fill = outsourcing_group)) +
  geom_density(alpha = 0.2) +
  geom_vline(data = age_statistics_2, aes(xintercept=median, colour = outsourcing_group)) +
  scale_x_continuous(breaks = seq(min(age_statistics_2$min), max(age_statistics_2$max),5)) +
  theme_minimal() +
  scale_colour_manual(values=better_colours, name = "Outsourcing group") +
  scale_fill_manual(values=better_colours, name = "Outsourcing group")

Footnotes

  1. Data file: “outputs/data/total_outsourced.csv”↩︎

  2. Data file: outputs/data/income_stats.csv↩︎

  3. Datafile: “../outputs/data/majorgroupcode_summary.csv”↩︎

  4. Data file: “../outputs/data/majorgroupcode_summary_long.csv” & “../outputs/data/majorgroupcode_summary_wide.csv”)↩︎

  5. “../outputs/data/elementary_occs_summary.csv”↩︎

  6. “../outputs/data/process_occs_summary.csv”↩︎

  7. “../outputs/data/caring_occs_summary.csv”↩︎

  8. “outputs/data/sector_summary.csv”↩︎

  9. “outputs/data/sector_summary_2.csv”↩︎

  10. “outputs/data/sector_summary_3.csv”↩︎

  11. “outputs/sector_summary_paysplit.csv”↩︎

  12. Data file: outputs/data/ethnicity_stats_1.csv↩︎

  13. “outputs/data/ethnicity_stats_paysplit_1.csv”↩︎

  14. Data file: outputs/data/ethnicity_stats_2.csv↩︎

  15. “outputs/data/ethnicity_stats_paysplit_1.csv”↩︎

  16. Data file: outputs/data/ethnicity_stats_2.csv↩︎

  17. Data file: outputs/data/arrival_in_UK_stats.csv↩︎

  18. ../outputs/data/interaction_ethnicity_arrival_in_UK_stats.csv↩︎

  19. Data file: outputs/data/gender_statistics.csv↩︎

  20. Data file: outputs/data/region_statistics.csv↩︎

  21. ../outputs/data/region_statistics_weighted.csv↩︎

  22. Data file: outputs/data/age_stats.csv↩︎

  23. “outputs/data/age_stats_2.csv”↩︎